|
2 | 2 | # |
3 | 3 | # Exercise 3.3 |
4 | 4 | import csv |
| 5 | +import gzip |
5 | 6 |
|
6 | | -def parse_csv(filename, select=None, types=None, has_headers=True, delimiter=',', silence_errors=True): |
| 7 | +def parse_csv(source, select=None, types=None, has_headers=True, delimiter=',', silence_errors=True): |
7 | 8 | ''' |
8 | | - Parse a CSV file into a list of records |
| 9 | + Parse an iterable into a list of records |
9 | 10 | ''' |
| 11 | + if type(source) == str: |
| 12 | + raise RuntimeError("source must be iterable") |
| 13 | + |
10 | 14 | if select and not has_headers: |
11 | 15 | raise RuntimeError("select argument requires column headers") |
| 16 | + |
| 17 | + rows = csv.reader(source, delimiter=delimiter) |
| 18 | + |
| 19 | + # Read the file headers |
| 20 | + if has_headers: |
| 21 | + headers = next(rows) |
| 22 | + start = 2 |
| 23 | + else: |
| 24 | + headers = [] |
| 25 | + start = 1 |
| 26 | + |
| 27 | + # If a column selector was given, find those names indices |
| 28 | + # Also narrow the set of headers as given |
| 29 | + if select: |
| 30 | + indices = [headers.index(colname) for colname in select] # [0, 1 ] |
| 31 | + headers = select |
| 32 | + else: |
| 33 | + indices = [] |
| 34 | + |
| 35 | + records = [] |
| 36 | + for rownum, row in enumerate(rows, start=start): |
12 | 37 |
|
13 | | - with open(filename) as f: |
14 | | - rows = csv.reader(f, delimiter=delimiter) |
| 38 | + if not row: # Skip rows with no data |
| 39 | + continue |
| 40 | + try: |
| 41 | + # Filter the row if specific columns were selected |
| 42 | + if indices: |
| 43 | + row = [ row[index] for index in indices ] |
| 44 | + |
| 45 | + # Convert types if a list of types is provided |
| 46 | + if types: |
| 47 | + row = [func(val) for func, val in zip(types, row) ] |
15 | 48 |
|
16 | | - # Read the file headers |
| 49 | + except ValueError as e: |
| 50 | + if not silence_errors: |
| 51 | + print(f"Row {rownum}: Could not convert: {row}") |
| 52 | + print(f"Row {rownum}: Reason: {e}\n") |
| 53 | + continue |
| 54 | + |
| 55 | + # print(list(zip(headers, row))) |
17 | 56 | if has_headers: |
18 | | - headers = next(rows) |
19 | | - start = 2 |
20 | | - else: |
21 | | - headers = [] |
22 | | - start = 1 |
23 | | - |
24 | | - # If a column selector was given, find indices of the specified columns. |
25 | | - # Also narrow the set of headers used for resulting dictionaries |
26 | | - if select: |
27 | | - indices = [headers.index(colname) for colname in select] # [0, 1 ] |
28 | | - headers = select |
| 57 | + record = dict(zip(headers, row)) |
29 | 58 | else: |
30 | | - indices = [] |
31 | | - |
32 | | - records = [] |
33 | | - for rownum, row in enumerate(rows, start=start): |
34 | | - if not row: # Skip rows with no data |
35 | | - continue |
36 | | - try: |
37 | | - # Filter the row if specific columns were selected |
38 | | - if indices: |
39 | | - row = [ row[index] for index in indices ] |
40 | | - |
41 | | - # Convert types if a list of types is provided |
42 | | - if types: |
43 | | - row = [func(val) for func, val in zip(types, row) ] |
44 | | - |
45 | | - except ValueError as e: |
46 | | - if not silence_errors: |
47 | | - print(f"Row {rownum}: Could not convert: {row}") |
48 | | - print(f"Row {rownum}: Reason: {e}\n") |
49 | | - continue |
50 | | - |
51 | | - # print(list(zip(headers, row))) |
52 | | - if has_headers: |
53 | | - record = dict(zip(headers, row)) |
54 | | - else: |
55 | | - record = tuple(row) |
56 | | - records.append(record) |
| 59 | + record = tuple(row) |
| 60 | + records.append(record) |
57 | 61 |
|
58 | 62 | return records |
59 | 63 |
|
60 | | -with open('.\\Data\\portfolio.csv') as f: |
61 | | - d = parse_csv(f, types=[str, int, float]) |
62 | 64 |
|
63 | | -print(d) |
| 65 | + |
| 66 | +# with open('.\\Data\\portfolio.csv') as f: |
| 67 | +# d = parse_csv(f, types=[str, int, float]) |
| 68 | + |
| 69 | +# print(d) |
| 70 | + |
| 71 | + |
| 72 | + |
| 73 | +# with gzip.open('.\\Data\\portfolio.csv.gz', 'rt') as f: |
| 74 | +# d = parse_csv(f, types=[str, int, float]) |
| 75 | + |
| 76 | +# print(d) |
| 77 | + |
| 78 | + |
| 79 | + |
| 80 | +# lines = ['name,shares,price', 'AA,100,34.23', 'IBM,50,91.1', 'HPE,75,45.1'] |
| 81 | +# pf = parse_csv(lines, types=[str,int,float]) |
| 82 | +# print(pf) |
| 83 | + |
| 84 | + |
| 85 | + |
64 | 86 |
|
65 | 87 | # portfolio = parse_csv('.\\Data\\portfolio.csv', select=['name','shares'], types=[str, int]) |
66 | 88 | # print(portfolio) |
|
0 commit comments