FAQ
Hi All ,

Could any one help to get max and min values from a specified column of a
csv file. The* csv file is large* and hence the below code did go bad.
*Alternate
methods would be highly appreciated
**
minimum.py*:
import csv
filename = "testLog_4.csv"
f = open(filename)
def col_min(mincname):
with open(filename, 'rb') as f:
reader = csv.reader(f, delimiter=",")
#print reader
#print mincname
col_index = next(reader).index(mincname)
#print col_index
least = min(rec[col_index] for rec in reader)
print least

col_min(str("Server Latency"))
col_min(str("Client Latency"))
f.close()

*maximum.py:*
import csv
filename = "testLog_4.csv"
f = open(filename)
def col_max(maxcname):
with open(filename, 'rb') as f:
reader = csv.reader(f, delimiter=",")
#print reader
#print cname
col_index = next(reader).index(maxcname)
#print col_index
highest = max(rec[col_index] for rec in reader)
print highest

col_max(str("Server Latency"))
col_max(str("Client Latency"))
f.close()


Thanks
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mail.python.org/pipermail/python-list/attachments/20110707/843a7163/attachment.html>

Search Discussions

  • John [H2O] at Jul 7, 2011 at 10:38 am
    I think you would benefit from reading the data into a numpy array first,
    then using numpy min, max functions.



    prakash jp wrote:
    Hi All ,

    Could any one help to get max and min values from a specified column of a
    csv file. The* csv file is large* and hence the below code did go bad.
    *Alternate
    methods would be highly appreciated
    **
    minimum.py*:
    import csv
    filename = "testLog_4.csv"
    f = open(filename)
    def col_min(mincname):
    with open(filename, 'rb') as f:
    reader = csv.reader(f, delimiter=",")
    #print reader
    #print mincname
    col_index = next(reader).index(mincname)
    #print col_index
    least = min(rec[col_index] for rec in reader)
    print least

    col_min(str("Server Latency"))
    col_min(str("Client Latency"))
    f.close()

    *maximum.py:*
    import csv
    filename = "testLog_4.csv"
    f = open(filename)
    def col_max(maxcname):
    with open(filename, 'rb') as f:
    reader = csv.reader(f, delimiter=",")
    #print reader
    #print cname
    col_index = next(reader).index(maxcname)
    #print col_index
    highest = max(rec[col_index] for rec in reader)
    print highest

    col_max(str("Server Latency"))
    col_max(str("Client Latency"))
    f.close()


    Thanks

    --
    http://mail.python.org/mailman/listinfo/python-list
    --
    View this message in context: http://old.nabble.com/find-max-and-min-values-from-a-column-of-a-csv-file-tp32011861p32012126.html
    Sent from the Python - python-list mailing list archive at Nabble.com.
  • Prakash jp at Jul 19, 2011 at 9:12 am
    Thanks for the suggestions. Felt the thread could be of help on
    consolidating the solution.

    *Max Value from a csv column:*

    import numpy
    data1 = numpy.genfromtxt("data.csv",dtype='float',delimiter ',',skiprows=1, skip_header=0, skip_footer=0,
    usecols,usemask=True)
    #print data1
    print data1.max()

    Plz go through the below link to understand further on the arguments taken
    by numpy.genfromtxt():

    http://docs.scipy.org/doc/numpy-1.4.x/reference/generated/numpy.genfromtxt.html




    Thanks
    -------------- next part --------------
    An HTML attachment was scrubbed...
    URL: <http://mail.python.org/pipermail/python-list/attachments/20110719/18ba510c/attachment.html>
  • Peter Otten at Jul 7, 2011 at 12:33 pm

    prakash jp wrote:

    Could any one help to get max and min values from a specified column of a
    csv file. The* csv file is large* and hence the below code did go bad.
    *Alternate
    methods would be highly appreciated
    **
    minimum.py*:
    import csv
    filename = "testLog_4.csv"
    f = open(filename)
    def col_min(mincname):
    with open(filename, 'rb') as f:
    reader = csv.reader(f, delimiter=",")
    #print reader
    #print mincname
    col_index = next(reader).index(mincname)
    #print col_index
    least = min(rec[col_index] for rec in reader)
    print least

    col_min(str("Server Latency"))
    col_min(str("Client Latency"))
    f.close()
    I don't see a way to make this much faster as it is probably already i/o-
    bound. You can try and calculate all four extrema at once, so you have to
    iterate over the file only once:



    $ cat csv_minmax_chunked.py
    #!/usr/bin/env python
    import csv
    import locale
    from itertools import islice

    def make_key(index, type):
    def key(row):
    return type(row[index])
    return key

    class Aggregator(object):
    def __init__(self, name, index, type, key=None):
    self.name = name
    self.index = index
    self.type = type or float
    self.getvalue = make_key(index, type)
    self.key = {"key": key} if key is not None else {}
    def __repr__(self):
    return "{0}(name={1}, index={2}, type={3}".format(
    type(self).__name__, self.name, self.index, self.type)
    def __str__(self):
    try:
    return "%s: %s...%s" % (self.name, self.min, self.max)
    except AttributeError:
    return "%s: (no values)" % self.name
    def update(self, rows):
    values = map(self.getvalue, rows)
    chunkmin = min(values, **self.key)
    chunkmax = max(values, **self.key)
    try:
    curmin = self.min
    curmax = self.max
    except AttributeError:
    self.min = chunkmin
    self.max = chunkmax
    else:
    self.min = min(curmin, chunkmin, **self.key)
    self.max = max(curmax, chunkmax, **self.key)

    def identity(s):
    return s

    convert = {
    "float": (float,),
    "int": (int,),
    "text": (lambda s: s, locale.strxfrm),
    "": (identity,)
    }

    def main():
    locale.setlocale(locale.LC_ALL, "")

    import argparse
    parser = argparse.ArgumentParser()
    parser.add_argument("csvfile")
    parser.add_argument("columns", nargs="+")
    parser.add_argument("--chunksize", "-s", type=int, default=2**12)
    args = parser.parse_args()

    ntpairs = (column.partition(":")[::2] for column in args.columns)
    with open(args.csvfile, "rb") as instream:
    rows = csv.reader(instream)
    header = dict((name, index) for index, name in
    enumerate(next(rows)))
    aggregators = [
    Aggregator(
    "{0}({1})".format(name, type or "str"),
    header[name],
    *convert[type])
    for name, type in ntpairs]
    chunksize = args.chunksize
    while True:
    chunk = list(islice(rows, chunksize))
    if not chunk:
    break
    for agg in aggregators:
    agg.update(chunk)
    for agg in aggregators:
    print agg

    if __name__ == "__main__":
    main()
    $ head sample.csv
    alpha,beta,gamma,delta
    69,-454,460960,ELIJAH
    92,796,278885,SUFFICIENCY
    42,895,934523,PIONEERED
    88,-213,360633,reassert
    63,-518,327010,mcmahon
    84,604,540764,deprecatory
    83,117,621832,VEGETARIANISM'S
    61,411,844243,tan
    29,-147,982043,plushest
    $ ./csv_minmax_chunked.py sample.csv alpha:int beta:int beta gamma:float
    delta delta:text
    alpha(int): 0...99
    beta(int): -1000...999
    beta(str): -1...999
    gamma(float): 0.0...999997.0
    delta(str): AALIYAH...?tudes
    delta(text): a...ZYUGANOV'S

Related Discussions

Discussion Navigation
viewthread | post
Discussion Overview
grouppython-list @
categoriespython
postedJul 7, '11 at 9:58a
activeJul 19, '11 at 9:12a
posts4
users3
websitepython.org

People

Translate

site design / logo © 2022 Grokbase