Skip to content

Commit dfdd314

Browse files
committed
CME Updated Files
CME Rollup, Merge, Zip, Json
1 parent 3ba54c4 commit dfdd314

4 files changed

Lines changed: 199 additions & 40 deletions

File tree

CME_Json.py

Lines changed: 148 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,148 @@
1+
import pandas as pd
2+
import json
3+
import os
4+
5+
# Load cme_futures.json file
6+
with open('CME_Futures.json') as f:
7+
variables_data = json.load(f)
8+
9+
# Read cme_weather_specs.xlsx file
10+
specs_data = pd.read_excel('cme_weather_specs.xlsx')
11+
missing_stations_data = [
12+
{'Commodity Code': 'KRK', 'Contract Name': 'CME Seasonal Strip Degree Days Index Futures - Houston CDD May'},
13+
{'Commodity Code': 'K6', 'Contract Name': 'CME Degree Days Index Futures - Philadelphia CDD'},
14+
{'Commodity Code': 'KW', 'Contract Name': 'CME Degree Days Index Futures - Boston CDD'}
15+
]
16+
missing_data_df = pd.DataFrame(missing_stations_data)
17+
specs_data = pd.concat([specs_data, missing_data_df], ignore_index=True)
18+
19+
# Generate stations.json and find the earliest and most recent dates
20+
stations = {
21+
"type": "FeatureCollection",
22+
"features": []
23+
}
24+
earliest_date = None
25+
most_recent_date = None
26+
27+
# Get a list of CSV files directly under the CME_DDIF folder
28+
csv_files = [file for file in os.listdir('CME_DDIF') if file.endswith('.csv')]
29+
30+
for csv_file in csv_files:
31+
csv_file_path = os.path.join('CME_DDIF', csv_file)
32+
station_name = os.path.splitext(csv_file)[0]
33+
code = station_name # Assuming station name corresponds to the code
34+
35+
36+
# Check if code exists in specs_data DataFrame
37+
38+
39+
# Check if code exists in specs_data DataFrame
40+
if code not in specs_data['Commodity Code'].values:
41+
continue
42+
43+
# Retrieve the description from specs_data DataFrame
44+
description = specs_data.loc[specs_data['Commodity Code'] == code, 'Contract Name'].values[0]
45+
46+
merged_data = pd.read_csv(csv_file_path)
47+
48+
date_range = [
49+
str(merged_data['dt'].min()),
50+
str(merged_data['dt'].max())
51+
]
52+
53+
if earliest_date is None or merged_data['dt'].min() < earliest_date:
54+
earliest_date = merged_data['dt'].min()
55+
56+
if most_recent_date is None or merged_data['dt'].max() > most_recent_date:
57+
most_recent_date = merged_data['dt'].max()
58+
59+
station = {
60+
"type": "Feature",
61+
"properties": {
62+
"file name": csv_file,
63+
"station name": station_name,
64+
"description": [description],
65+
"variables": {
66+
"0:": {
67+
"column name": variables_data["0"]["column name"],
68+
"plain text description": variables_data["0"]["plain text description"],
69+
"unit of measurement": variables_data["0"]["unit of measurement"],
70+
"precision": variables_data["0"]["precision"],
71+
"na value": variables_data["0"]["na value"],
72+
},
73+
"1:": {
74+
"column name": variables_data["1"]["column name"],
75+
"plain text description": variables_data["1"]["plain text description"],
76+
"unit of measurement": variables_data["1"]["unit of measurement"],
77+
"precision": variables_data["1"]["precision"],
78+
"na value": variables_data["1"]["na value"]
79+
},
80+
"2:": {
81+
"column name": variables_data["2"]["column name"],
82+
"plain text description": variables_data["2"]["plain text description"],
83+
"unit of measurement": variables_data["2"]["unit of measurement"],
84+
"precision": variables_data["2"]["precision"],
85+
"na value": variables_data["2"]["na value"]
86+
}
87+
},
88+
"date range": date_range
89+
}
90+
}
91+
stations["features"].append(station)
92+
93+
# Save stations.json
94+
stations_file_path = os.path.join('CME_DDIF', 'stations.json')
95+
with open(stations_file_path, 'w') as outfile:
96+
json.dump(stations, outfile, indent=4)
97+
98+
# Generate metadata.json
99+
metadata = {
100+
"compression": None,
101+
"name": "cme_ddif",
102+
"documentation": "https://www.cmegroup.com/content/dam/cmegroup/rulebook/CME/IV/400/403/403.pdf",
103+
"description": "HDD, CDD, and CAT futures settlement data from the Chicago Mercantile Exchange (CME) by city",
104+
"publisher": "Chicago Mercantile Exchange",
105+
"source data url": "ftp.cmegroup.com",
106+
"tags": [
107+
"temperature",
108+
"Europe",
109+
"U.S",
110+
"CME"
111+
],
112+
"date range": [
113+
str(earliest_date),
114+
str(most_recent_date)
115+
],
116+
"station metadata": "stations.json",
117+
"previous hash": None,
118+
"time generated": "",
119+
"data dictionary": {
120+
"0:": {
121+
"column name": variables_data["0"]["column name"],
122+
"plain text description": variables_data["0"]["plain text description"],
123+
"unit of measurement": variables_data["0"]["unit of measurement"],
124+
"precision": variables_data["0"]["precision"],
125+
"na value": variables_data["0"]["na value"],
126+
},
127+
"1:": {
128+
"column name": variables_data["1"]["column name"],
129+
"plain text description": variables_data["1"]["plain text description"],
130+
"unit of measurement": variables_data["1"]["unit of measurement"],
131+
"precision": variables_data["1"]["precision"],
132+
"na value": variables_data["1"]["na value"]
133+
},
134+
"2:": {
135+
"column name": variables_data["2"]["column name"],
136+
"plain text description": variables_data["2"]["plain text description"],
137+
"unit of measurement": variables_data["2"]["unit of measurement"],
138+
"precision": variables_data["2"]["precision"],
139+
"na value": variables_data["2"]["na value"]
140+
}
141+
}
142+
}
143+
144+
# Save metadata.json
145+
metadata_file_path = os.path.join('CME_DDIF', 'metadata.json')
146+
with open(metadata_file_path, 'w') as outfile:
147+
json.dump(metadata, outfile, indent=4)
148+

CME_Merged.py

Lines changed: 48 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111
input_zip_file = '/Users/maana/Documents/GitHub/dWeather-Python-Client/cme_futures_hist_by_station.zip'
1212

1313
# Output folder for merged data
14-
output_folder = 'Merge'
14+
output_folder = 'CME_DDIF'
1515

1616
# Create a dictionary to store the station data
1717
station_data = {}
@@ -22,58 +22,67 @@
2222
if file.endswith('.csv'):
2323
station_name = os.path.splitext(file)[0]
2424
file_path = os.path.join(root, file)
25-
25+
2626
# Read the station CSV file and store the data in the dictionary
2727
station_data[station_name] = pd.read_csv(file_path)
2828

2929
# Process the zip files
3030
with zipfile.ZipFile(input_zip_file, 'r') as input_zip_ref:
3131
file_list = input_zip_ref.namelist()
32-
with open('station_data.txt','w',encoding='ascii') as station_data_ref:
32+
with open('station_data.txt', 'w', encoding='ascii') as station_data_ref:
3333
keys = station_data.keys()
3434
for key in keys:
35-
station_data_ref.write('!'+key+'!'+"\n")
36-
35+
station_data_ref.write('!' + key + '!' + "\n")
36+
3737
for file_name in file_list:
3838
# Check if the file is a CSV file
3939
if not file_name.endswith('.csv'):
4040
continue
41-
41+
4242
# Extract the station name from the file name
4343
station_name = re.match(r'(.+)/data.csv', file_name).group(1)
44-
with open('station_name.txt','a',encoding='ascii') as station_name_ref:
45-
station_name_ref.write('!'+station_name+'!'+"\n")
44+
with open('station_name.txt', 'a', encoding='ascii') as station_name_ref:
45+
station_name_ref.write('!' + station_name + '!' + "\n")
4646
if station_name is None:
4747
continue
48-
49-
50-
48+
5149
# Check if the station has corresponding data in the station data dictionary
52-
if station_name not in station_data:
53-
continue
54-
55-
# Read the CSV data from the zip file
56-
with input_zip_ref.open(file_name) as input_csv_file_ref:
57-
binary_data = input_csv_file_ref.read()
58-
ascii_data = binary_data.decode('ascii')
59-
zip_data = pd.read_csv(StringIO(ascii_data))
60-
61-
# Merge the station data with the zip data
62-
merged_data = pd.concat([station_data[station_name], zip_data], ignore_index=True)
63-
merged_data.drop_duplicates(subset=['dt', 'SETT'], keep='last', inplace=True)
64-
65-
# Save the merged data as a new CSV file for the station
66-
station_folder = os.path.join(output_folder, station_name)
67-
os.makedirs(station_folder, exist_ok=True)
68-
output_csv_file = os.path.join(station_folder, 'merged_data.csv')
69-
merged_data.to_csv(output_csv_file, index=False)
70-
71-
# Create a zip file containing the merged data for each station
72-
output_zip_file = 'merged_data_by_station.zip'
73-
74-
with zipfile.ZipFile(output_zip_file, 'w', zipfile.ZIP_DEFLATED) as output_zip_file_ref:
75-
# Traverse the output folder and add each merged CSV file to the zip file
76-
for root, dirs, files in os.walk(output_folder):
77-
for file in files:
78-
file_path = os.path.join(root, file)
79-
output_zip_file_ref.write(file_path, os.path.relpath(file_path, output_folder))
50+
if station_name in station_data:
51+
# Read the CSV data from the zip file
52+
with input_zip_ref.open(file_name) as input_csv_file_ref:
53+
binary_data = input_csv_file_ref.read()
54+
ascii_data = binary_data.decode('ascii')
55+
zip_data = pd.read_csv(StringIO(ascii_data))
56+
57+
# Merge the station data with the zip data
58+
merged_data = pd.concat([station_data[station_name], zip_data], ignore_index=True)
59+
merged_data.drop_duplicates(subset=['dt', 'SETT'], keep='last', inplace=True)
60+
61+
# Save the merged data as a new CSV file for the station
62+
os.makedirs(output_folder, exist_ok=True)
63+
output_file_name = f"{station_name}.csv"
64+
output_csv_file = os.path.join(output_folder, output_file_name)
65+
merged_data.to_csv(output_csv_file, index=False)
66+
67+
# Remove the station entry from the station data dictionary to keep track of processed stations
68+
del station_data[station_name]
69+
else:
70+
# Read the CSV data from the zip file
71+
with input_zip_ref.open(file_name) as input_csv_file_ref:
72+
binary_data = input_csv_file_ref.read()
73+
ascii_data = binary_data.decode('ascii')
74+
zip_data = pd.read_csv(StringIO(ascii_data))
75+
76+
# Save the CSV data as a new CSV file in the output folder
77+
os.makedirs(output_folder, exist_ok=True)
78+
output_file_name = f"{station_name}.csv"
79+
output_csv_file = os.path.join(output_folder, output_file_name)
80+
zip_data.to_csv(output_csv_file, index=False)
81+
82+
# Add the files from the "station" folder that are not in "cme_futures_hist_by_station" to the "CME_DDIF" folder
83+
for station_name in station_data.keys():
84+
station_csv_data = station_data[station_name]
85+
output_file_name = f"{station_name}.csv"
86+
output_csv_file = os.path.join(output_folder, output_file_name)
87+
station_csv_data.to_csv(output_csv_file, index=False)
88+

CME_Rollup.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@
8585
station_df['dt'] = pd.to_datetime(station_df['dt']).dt.strftime("%Y-%m-%d")
8686
print(station_df)"""
8787
import ipdb;ipdb.set_trace()
88+
8889

8990

9091

CME_Zip.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -155,4 +155,5 @@
155155
data_csv_file_ref.write(line)
156156

157157
# write the file to the zip file
158-
output_zip_file_ref.write(data_csv_file, os.path.relpath(data_csv_file, output_folder))
158+
output_zip_file_ref.write(data_csv_file, os.path.relpath(data_csv_file, output_folder))
159+

0 commit comments

Comments
 (0)