import os import yfinance as yf import pandas as pd def pull(): # Get the CWD for pathing due to being called from C# now SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(SCRIPT_DIR, "data") # Import the S&P 500 symbols symbols = pd.read_excel(os.path.join(DATA_DIR, "stock_symbols.xlsx")) symbols.columns = symbols.columns.str.strip() tickers = symbols['Symbol'].tolist() # Scrape the data all_data = [] for i, symbol in enumerate(tickers): print(f"Processing: {i} of {len(tickers)}") df = yf.download(symbol, period="max", auto_adjust=True) if not df.empty: # Remove the ticker column df.columns = df.columns.get_level_values(0) # add to master list all_data.append(df) # Concatinate into a combined list and cache print("Processing data") final_df = pd.concat(all_data) # Save to file print("Writing data to file") final_df.to_parquet(os.path.join(DATA_DIR, "stocks.parquet")) final_df.head(200).to_csv(os.path.join(DATA_DIR, "stocks.preview.csv"))