Build the AI predictor off the features so they are the same as the training model

This commit is contained in:
2026-02-17 18:38:40 -08:00
parent ad63ab0806
commit 29c0661808
2 changed files with 46 additions and 28 deletions
+7 -28
View File
@@ -1,9 +1,9 @@
import os
import yfinance as yf
import pandas as pd
import features
def pull():
# Get the CWD for pathing due to being called from C# now
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
DATA_DIR = os.path.join(SCRIPT_DIR, "data")
@@ -19,40 +19,19 @@ def pull():
print(f"Processing: {i} of {len(tickers)}")
df = yf.download(symbol, period="max", auto_adjust=True)
if not df.empty:
# Remove the ticker column
df.columns = df.columns.get_level_values(0)
# Make sure Date is a number object
df = df.reset_index()
df['Date'] = pd.to_numeric(pd.to_datetime(df['Date']))
# Add the Symbol column for tracking | as an int 1 hot encoded
df['Symbol'] = i
# Add feature Spread
df['Spread'] = abs( df['High'] - df['Low'] )
# Add feature for Returns
df['Return'] = df['Close'].pct_change()
# Add feature for volitility last 5
df['Volatility_5'] = df['Return'].transform(lambda x: x.rolling(5).std())
# Add feature for volitility last 20
df['Volatility_20'] = df['Return'].transform(lambda x: x.rolling(20).std())
# Use external featuers to make sure loaded is the same
df = features.MakeFeatures(df, i)
# add to master list
all_data.append(df)
# Concatinate into a combined list and cache
print("Processing data")
final_df = pd.concat(all_data)
# Make date the index so it doesnt influence the training
final_df.set_index('Date', inplace=True)
# Drop rows with null values
final_df.dropna(inplace=True)
# Cleanup the data
final_df = features.CleanDF(final_df)
# Save to file
print("Writing data to file")
final_df.to_parquet(os.path.join(DATA_DIR, "stocks.parquet"))
final_df.head(200).to_csv(os.path.join(DATA_DIR, "stocks.preview.csv"))