Wrap in a class and set function as static

This commit is contained in:
2026-02-11 20:03:05 -08:00
parent cccf4650c2
commit 0fcdfc91bf
+20 -16
View File
@@ -1,16 +1,20 @@
import yfinance as yf
import pandas as pd
# Import the S&P 500 symbols
symbols = pd.read_excel("./data/stock_symbols.xlsx")
symbols.columns = symbols.columns.str.strip()
tickers = symbols['Symbol'].tolist()
class DataPuller:
# Scrape the data
all_data = []
for i, symbol in enumerate(tickers): # Try first 20
@staticmethod
def pull():
# Import the S&P 500 symbols
symbols = pd.read_excel("./data/stock_symbols.xlsx")
symbols.columns = symbols.columns.str.strip()
tickers = symbols['Symbol'].tolist()
# Scrape the data
all_data = []
for i, symbol in enumerate(tickers): # Try first 20
print(f"Processing: {i} of {len(tickers)}")
df = yf.download(symbol, period="max")
df = yf.download(symbol, period="max", auto_adjust=True)
if not df.empty:
# Remove the ticker column
df.columns = df.columns.get_level_values(0)
@@ -37,13 +41,13 @@ for i, symbol in enumerate(tickers): # Try first 20
all_data.append(df)
# Concatinate into a combined list and cache
print("Processing data")
final_df = pd.concat(all_data)
# Concatinate into a combined list and cache
print("Processing data")
final_df = pd.concat(all_data)
# Drop rows with null values
final_df.dropna(inplace=True)
# Drop rows with null values
final_df.dropna(inplace=True)
print("Writing data to file")
final_df.to_parquet("./data/stocks.parquet")
final_df.head(200).to_csv("./data/stocks_preview.csv")
print("Writing data to file")
final_df.to_parquet("./data/stocks.parquet")
final_df.head(200).to_csv("./data/stocks_preview.csv")