Hi all
I am just learning python. I am trying to visualize the correlation matrix of the S&P 500 stock data but I am getting a ValueError: could not convert string to float: '2000-01-03'.
Here is the coding:
I am just learning python. I am trying to visualize the correlation matrix of the S&P 500 stock data but I am getting a ValueError: could not convert string to float: '2000-01-03'.
Here is the coding:
import os
import pandas as pd
import requests
import pickle
import yfinance as yf
from pandas_datareader import data as pdr
import bs4 as bs
import datetime as dt
import matplotlib.pyplot as plt
from matplotlib import style
import numpy as np
# Function to save the S&P 500 tickers from the Wikipedia page
def save_sp500_tickers():
url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
resp = requests.get(url)
soup = bs.BeautifulSoup(resp.text, "html.parser")
table = soup.find("table", {"id": "constituents"})
tickers = []
for row in table.find_all("tr")[1:]:
ticker = row.find_all("td")[0].text.strip()
ticker = str(ticker.replace('.', '-'))
tickers.append(ticker)
with open("sp500tickers.pickle", "wb") as f:
pickle.dump(tickers, f)
print(tickers)
return tickers
# Function to retrieve stock data from Yahoo Finance
def get_data_from_yahoo(reload_sp500=False):
if reload_sp500:
tickers = save_sp500_tickers()
else:
with open("sp500tickers.pickle", "rb") as f:
tickers = pickle.load(f)
if not os.path.exists('stock_dfs'):
os.makedirs('stock_dfs')
start = dt.datetime(2000, 1, 1)
end = dt.datetime(2016, 12, 31)
for ticker in tickers:
print(ticker)
if not os.path.exists(f'stock_dfs/{ticker}.csv'):
data = yf.download(ticker, start=start, end=end)
data.to_csv(f'stock_dfs/{ticker}.csv')
else:
print(f'Already have {ticker}')
# Function to compile the stock data into a single DataFrame
import csv
def compile_data():
with open("sp500tickers.pickle", "rb") as f:
tickers = pickle.load(f)
main_df = pd.DataFrame()
for count, ticker in enumerate(tickers):
with open(f'stock_dfs/{ticker.replace(".", "_")}.csv', 'r') as file:
reader = csv.reader(file)
next(reader) # Skip the header row
df = pd.DataFrame(reader, columns=['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'])
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df[['Open', 'High', 'Low', 'Close', 'Adj Close']] = df[['Open', 'High', 'Low', 'Close', 'Adj Close']].astype(float)
df['Volume'] = df['Volume'].astype(object)
df.drop(['Open', 'High', 'Low', 'Close'], axis=1, inplace=True)
df.rename(columns={'Adj Close': ticker, 'Volume': f'{ticker}_Volume'}, inplace=True)
if main_df.empty:
main_df = df
else:
main_df = main_df.join(df, how='outer')
if count % 10 == 0:
print(count)
print(main_df.head())
main_df.to_csv('sp500_joined_close.csv')
# Function to visualize the correlation matrix of the stock data
def visualize_data():
df = pd.read_csv('sp500_joined_close.csv')
df_corr = df.corr()
print(df_corr.head())
data = df_corr.values
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
heatmap = ax.pcolor(data, cmap=plt.cm.RdYlGn)
fig.colorbar(heatmap)
ax.set_xticks(np.arange(data.shape[1]) + 0.5, minor=False)
ax.set_yticks(np.arange(data.shape[0]) + 0.5, minor=False)
ax.invert_yaxis()
ax.xaxis.tick_top()
column_labels = df_corr.columns
row_labels = df_corr.index
ax.set_xticklabels(column_labels, rotation=90)
ax.set_yticklabels(row_labels)
plt.tight_layout()
plt.show()
save_sp500_tickers()
get_data_from_yahoo()
compile_data()
visualize_data()Thank you for your helps!!!
Larz60+ write Dec-13-2023, 02:23 AM:
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
Tags have been added for you this time. Please use BBCode tags on future projects.
Please post all code, output and errors (it it's entirety) between their respective tags. Refer to BBCode help topic on how to post. Use the "Preview Post" button to make sure the code is presented as you expect before hitting the "Post Reply/Thread" button.
Tags have been added for you this time. Please use BBCode tags on future projects.
