From 29c06618083fa3747db62191bb5c554363cf767b Mon Sep 17 00:00:00 2001
From: Derek Holloway <derek@mistox.com>
Date: Tue, 17 Feb 2026 18:38:40 -0800
Subject: [PATCH] Build the AI predictor off the features so they are the same
 as the training model

---
 WebServer/AIPython/ai-predictor.py | 39 ++++++++++++++++++++++++++++++
 WebServer/AIPython/datapuller.py   | 35 ++++++---------------------
 2 files changed, 46 insertions(+), 28 deletions(-)
 create mode 100644 WebServer/AIPython/ai-predictor.py

diff --git a/WebServer/AIPython/ai-predictor.py b/WebServer/AIPython/ai-predictor.py
new file mode 100644
index 00000000..a1e07ee3
--- /dev/null
+++ b/WebServer/AIPython/ai-predictor.py
@@ -0,0 +1,39 @@
+import os
+import yfinance as yf
+import features
+import matplotlib
+matplotlib.use("Agg")
+
+def Predict():
+    # Define paths (consistent with your previous script)
+    SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
+    DATA_DIR = os.path.join(SCRIPT_DIR, "data")
+    MODEL_PATH = os.path.join(DATA_DIR, "model.keras")
+
+    # Pull 1 month of current data to make prediction against | for volatility 20
+    df = yf.download("SPY", period="2mo", auto_adjust=True)
+    if not df.empty:
+        df = features.MakeFeatures(df, 1)
+        df = features.CleanDF(df)
+
+    # Drop our predictor
+    df.drop('Volatility_5', axis=1, inplace=True)
+
+    # Lazy load this so it doesnt interfere with yfinance
+    from keras.models import load_model
+
+    # Load the model
+    reconstructed_model = load_model(MODEL_PATH)
+
+    # Verify it loaded correctly
+    reconstructed_model.summary()
+
+    # Predict
+    predictions = reconstructed_model.predict(df)
+
+    # 'predictions' will be a 2D array, flatten it if you want a simple list
+    flat_predictions = predictions.flatten()
+
+    print(f"Predicted Volatility: {flat_predictions}")
+    
+    return flat_predictions
\ No newline at end of file
diff --git a/WebServer/AIPython/datapuller.py b/WebServer/AIPython/datapuller.py
index a008389f..d701f3a5 100644
--- a/WebServer/AIPython/datapuller.py
+++ b/WebServer/AIPython/datapuller.py
@@ -1,9 +1,9 @@
 import os
 import yfinance as yf
 import pandas as pd
+import features
 
 def pull():
-
     # Get the CWD for pathing due to being called from C# now
     SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
     DATA_DIR = os.path.join(SCRIPT_DIR, "data")
@@ -19,40 +19,19 @@ def pull():
         print(f"Processing: {i} of {len(tickers)}")
         df = yf.download(symbol, period="max", auto_adjust=True)
         if not df.empty:
-            # Remove the ticker column
-            df.columns = df.columns.get_level_values(0)
-            
-            # Make sure Date is a number object
-            df = df.reset_index()
-            df['Date'] = pd.to_numeric(pd.to_datetime(df['Date']))
-
-            # Add the Symbol column for tracking | as an int 1 hot encoded
-            df['Symbol'] = i
-        
-            # Add feature Spread
-            df['Spread'] = abs( df['High'] - df['Low'] )
-
-            # Add feature for Returns
-            df['Return'] = df['Close'].pct_change()
-
-            # Add feature for volitility last 5
-            df['Volatility_5'] = df['Return'].transform(lambda x: x.rolling(5).std())
-
-            # Add feature for volitility last 20
-            df['Volatility_20'] = df['Return'].transform(lambda x: x.rolling(20).std())
-
+            # Use external featuers to make sure loaded is the same
+            df = features.MakeFeatures(df, i)
+            # add to master list
             all_data.append(df)
 
     # Concatinate into a combined list and cache
     print("Processing data")
     final_df = pd.concat(all_data)
 
-    # Make date the index so it doesnt influence the training
-    final_df.set_index('Date', inplace=True)
-
-    # Drop rows with null values
-    final_df.dropna(inplace=True)
+    # Cleanup the data
+    final_df = features.CleanDF(final_df)
 
+    # Save to file
     print("Writing data to file")
     final_df.to_parquet(os.path.join(DATA_DIR, "stocks.parquet"))
     final_df.head(200).to_csv(os.path.join(DATA_DIR, "stocks.preview.csv"))
\ No newline at end of file