From 29c06618083fa3747db62191bb5c554363cf767b Mon Sep 17 00:00:00 2001 From: Derek Holloway Date: Tue, 17 Feb 2026 18:38:40 -0800 Subject: [PATCH] Build the AI predictor off the features so they are the same as the training model --- WebServer/AIPython/ai-predictor.py | 39 ++++++++++++++++++++++++++++++ WebServer/AIPython/datapuller.py | 35 ++++++--------------------- 2 files changed, 46 insertions(+), 28 deletions(-) create mode 100644 WebServer/AIPython/ai-predictor.py diff --git a/WebServer/AIPython/ai-predictor.py b/WebServer/AIPython/ai-predictor.py new file mode 100644 index 00000000..a1e07ee3 --- /dev/null +++ b/WebServer/AIPython/ai-predictor.py @@ -0,0 +1,39 @@ +import os +import yfinance as yf +import features +import matplotlib +matplotlib.use("Agg") + +def Predict(): + # Define paths (consistent with your previous script) + SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) + DATA_DIR = os.path.join(SCRIPT_DIR, "data") + MODEL_PATH = os.path.join(DATA_DIR, "model.keras") + + # Pull 1 month of current data to make prediction against | for volatility 20 + df = yf.download("SPY", period="2mo", auto_adjust=True) + if not df.empty: + df = features.MakeFeatures(df, 1) + df = features.CleanDF(df) + + # Drop our predictor + df.drop('Volatility_5', axis=1, inplace=True) + + # Lazy load this so it doesnt interfere with yfinance + from keras.models import load_model + + # Load the model + reconstructed_model = load_model(MODEL_PATH) + + # Verify it loaded correctly + reconstructed_model.summary() + + # Predict + predictions = reconstructed_model.predict(df) + + # 'predictions' will be a 2D array, flatten it if you want a simple list + flat_predictions = predictions.flatten() + + print(f"Predicted Volatility: {flat_predictions}") + + return flat_predictions \ No newline at end of file diff --git a/WebServer/AIPython/datapuller.py b/WebServer/AIPython/datapuller.py index a008389f..d701f3a5 100644 --- a/WebServer/AIPython/datapuller.py +++ b/WebServer/AIPython/datapuller.py @@ -1,9 +1,9 @@ import os import yfinance as yf import pandas as pd +import features def pull(): - # Get the CWD for pathing due to being called from C# now SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(SCRIPT_DIR, "data") @@ -19,40 +19,19 @@ def pull(): print(f"Processing: {i} of {len(tickers)}") df = yf.download(symbol, period="max", auto_adjust=True) if not df.empty: - # Remove the ticker column - df.columns = df.columns.get_level_values(0) - - # Make sure Date is a number object - df = df.reset_index() - df['Date'] = pd.to_numeric(pd.to_datetime(df['Date'])) - - # Add the Symbol column for tracking | as an int 1 hot encoded - df['Symbol'] = i - - # Add feature Spread - df['Spread'] = abs( df['High'] - df['Low'] ) - - # Add feature for Returns - df['Return'] = df['Close'].pct_change() - - # Add feature for volitility last 5 - df['Volatility_5'] = df['Return'].transform(lambda x: x.rolling(5).std()) - - # Add feature for volitility last 20 - df['Volatility_20'] = df['Return'].transform(lambda x: x.rolling(20).std()) - + # Use external featuers to make sure loaded is the same + df = features.MakeFeatures(df, i) + # add to master list all_data.append(df) # Concatinate into a combined list and cache print("Processing data") final_df = pd.concat(all_data) - # Make date the index so it doesnt influence the training - final_df.set_index('Date', inplace=True) - - # Drop rows with null values - final_df.dropna(inplace=True) + # Cleanup the data + final_df = features.CleanDF(final_df) + # Save to file print("Writing data to file") final_df.to_parquet(os.path.join(DATA_DIR, "stocks.parquet")) final_df.head(200).to_csv(os.path.join(DATA_DIR, "stocks.preview.csv")) \ No newline at end of file