Files
AI-Stock-Trader/WebServer/AIPython/datapuller.py
T

37 lines
1.2 KiB
Python

import os
import yfinance as yf
import pandas as pd
import features
def pull():
# Get the CWD for pathing due to being called from C# now
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(SCRIPT_DIR, "data")
# Import the S&P 500 symbols
symbols = pd.read_excel(os.path.join(DATA_DIR, "stock_symbols.xlsx"))
symbols.columns = symbols.columns.str.strip()
tickers = symbols['Symbol'].tolist()
# Scrape the data
all_data = []
for i, symbol in enumerate(tickers):
print(f"Processing: {i} of {len(tickers)}")
df = yf.download(symbol, period="max", auto_adjust=True)
if not df.empty:
# Use external featuers to make sure loaded is the same
df = features.MakeFeatures(df, i)
# add to master list
all_data.append(df)
# Concatinate into a combined list and cache
print("Processing data")
final_df = pd.concat(all_data)
# Cleanup the data
final_df = features.CleanDF(final_df)
# Save to file
print("Writing data to file")
final_df.to_parquet(os.path.join(DATA_DIR, "stocks.parquet"))
final_df.head(200).to_csv(os.path.join(DATA_DIR, "stocks.preview.csv"))