From 65b63b719bc7a918e81e8be0fd379eed8456db33 Mon Sep 17 00:00:00 2001 From: Derek Holloway Date: Tue, 10 Mar 2026 17:26:11 -0700 Subject: [PATCH] Finalize the AI model --- WebServer/AIPython/aipredictor.py | 10 ++++--- WebServer/AIPython/datapuller.py | 4 ++- WebServer/AIPython/features.py | 46 +++++++++++++++++++------------ 3 files changed, 38 insertions(+), 22 deletions(-) diff --git a/WebServer/AIPython/aipredictor.py b/WebServer/AIPython/aipredictor.py index d8f3b226..cae6afa1 100644 --- a/WebServer/AIPython/aipredictor.py +++ b/WebServer/AIPython/aipredictor.py @@ -21,8 +21,10 @@ def Predict(): # Pull 1 month of current data to make prediction against | for volatility 20 df = yf.download(Symbol, period="2mo", auto_adjust=True) if not df.empty: - # Remove the ticker column + # Remove the horizontal ticker column df.columns = df.columns.get_level_values(0) + # Add in the Vertical ticker column + df['Ticker'] = Symbol # Make the feature set df = features.MakeFeatures(df) @@ -61,10 +63,10 @@ def Predict(): # Set the movement indicator movement_indicator = 0 - averagePrediction = np.mean(flat_predictions) - if (averagePrediction > 0.005): # as in 3% swing up + averagePrediction = np.mean(flat_predictions) + predictionTrend + if (averagePrediction > 0.3): # as in 3% swing up movement_indicator = 1 - elif (averagePrediction < -0.005): # as in 3% swing down + elif (averagePrediction < -0.3): # as in 3% swing down movement_indicator = -1 else: movement_indicator = 0 diff --git a/WebServer/AIPython/datapuller.py b/WebServer/AIPython/datapuller.py index 882ea3c9..e707fbfd 100644 --- a/WebServer/AIPython/datapuller.py +++ b/WebServer/AIPython/datapuller.py @@ -17,8 +17,10 @@ def pull(): for i, symbol in enumerate(tickers): df = yf.download(symbol, period="max", auto_adjust=True) if not df.empty: - # Remove the ticker column + # Remove the Horizontal ticker column df.columns = df.columns.get_level_values(0) + # Add in the Vertical ticker column + df['Ticker'] = symbol # add to master list all_data.append(df) diff --git a/WebServer/AIPython/features.py b/WebServer/AIPython/features.py index 4e9f9a04..981ecee7 100644 --- a/WebServer/AIPython/features.py +++ b/WebServer/AIPython/features.py @@ -5,6 +5,9 @@ def MakeFeatures(df): # Convert all F64 to F32 to save ram df = df.astype({col: 'float32' for col in df.select_dtypes('float64').columns}) + # Create Grouped columns by ticker + grouped = df.groupby('Ticker') + # Candle Wick's df['Spread'] = (df['High'] - df['Low']) / df['Close'] # in percentage of price candle_top = df[['Open', 'Close']].max(axis=1) @@ -12,45 +15,45 @@ def MakeFeatures(df): df['Upper_Shadow'] = (df['High'] - candle_top) / (df['High'] - df['Low']) # Is volume 2x higher than the 20-day average? - df['Vol_Intensity'] = df['Volume'] / df['Volume'].shift(1).rolling(20).mean() + df['Vol_Intensity'] = df['Volume'] / grouped['Volume'].transform(lambda x: x.rolling(20).mean()) # Volume Change - df['Volume_Chg'] = df['Volume'].pct_change() + df['Volume_Chg'] = grouped['Volume'].pct_change() # Moving Average Crossover (Golden/Death Cross logic) - df['Moving_Average_5'] = df['Close'].rolling(window=5).mean() - df['Moving_Average_20'] = df['Close'].rolling(window=20).mean() + df['Moving_Average_5'] = grouped['Close'].transform(lambda x: x.rolling(5).mean()) + df['Moving_Average_20'] = grouped['Close'].transform(lambda x: x.rolling(20).mean()) # if short term > long term (bullish), else 0 - df['Trend_Signal'] = (df['Moving_Average_5'] > df['Moving_Average_20']).astype(int) + df['Trend_Signal'] = (df['Moving_Average_5'] > df['Moving_Average_20']).astype(np.float32) # Distance from MA (How overextended are we?) df['Dist_From_MA20'] = (df['Close'] / df['Moving_Average_20']) - 1 # Bollinger Band Position (Where are we relative to volatility?) - std_20 = df['Close'].rolling(20).std() + std_20 = grouped['Close'].transform(lambda x: x.rolling(20).std()) upper_band = df['Moving_Average_20'] + (std_20 * 2) lower_band = df['Moving_Average_20'] - (std_20 * 2) - df['BB_Pos'] = (df['Close'] - lower_band) / (upper_band - lower_band) + df['BB_Pos'] = (df['Close'] - lower_band) / (upper_band - lower_band).replace(0, 1e-6) # Add feature for Returns - df['Return'] = df['Close'].pct_change() + df['Return'] = grouped['Close'].pct_change() # Log Returns (Better for AI than pct_change for statistical normality) - df['Log_Return'] = np.log(df['Close'] / df['Close'].shift(1)) + df['Log_Return'] = np.log(df['Close'] / grouped['Close'].shift(1)) # Add feature for volitility last 5 - df['Volatility_5'] = df['Return'].transform(lambda x: x.shift(1).rolling(5).std()) + df['Volatility_5'] = grouped['Return'].transform(lambda x: x.rolling(5).std()) # Add feature for volitility last 20 - df['Volatility_20'] = df['Return'].transform(lambda x: x.shift(1).rolling(20).std()) + df['Volatility_20'] = grouped['Return'].transform(lambda x: x.rolling(20).std()) # RSI (Relative Strength Index) - delta = df['Close'].diff() - gain = (delta.where(delta > 0, 0)).shift(1).rolling(window=14).mean() - loss = (-delta.where(delta < 0, 0)).shift(1).rolling(window=14).mean() + delta = grouped['Close'].diff() + gain = (delta.where(delta > 0, 0)).rolling(window=14).mean() + loss = (-delta.where(delta < 0, 0)).rolling(window=14).mean() rs = gain / loss df['RSI'] = 100 - (100 / (1 + rs)) # Return lagged for lag in range(1, 4): - df[f'Return_Lag_{lag}'] = df['Return'].shift(lag) - df[f'Vol_Lag_{lag}'] = df['Volume_Chg'].shift(lag) + df[f'Return_Lag_{lag}'] = grouped['Return'].shift(lag) + df[f'Vol_Lag_{lag}'] = grouped['Volume_Chg'].shift(lag) # This is our training metric of price difference 5 days ahead df['Target_Close'] = (np.log(df['Close'].shift(-5) / df['Close']) / df['Volatility_5']).clip(-10, 10) @@ -59,8 +62,17 @@ def MakeFeatures(df): with open("Target_Close_Average.txt", "w") as file: file.write(str(df["Target_Close"].mean())) + # Make a feature for the S&P500 average for the day + df['SP500_Market_Log_Return'] = df.groupby('Date')['Log_Return'].transform('mean') + # Relative Strength agains the S&P500 + df['SP500_Relative_Performance'] = df['Log_Return'] - df['SP500_Market_Log_Return'] + # S&P500 market trend + daily_trend = df.groupby('Date')['SP500_Market_Log_Return'].first().rolling(window=20).mean() + daily_trend.name = 'SP500_Market_Trend_20' + df = df.merge(daily_trend, on='Date', how='left') + # Drop every column that is a raw price or an unscaled average - cols_to_drop = ['Open', 'High', 'Low', 'Volume', 'Close', 'Moving_Average_5', 'Moving_Average_20'] + cols_to_drop = ['Open', 'High', 'Low', 'Volume', 'Close', 'Moving_Average_5', 'Moving_Average_20', 'Ticker'] for col in cols_to_drop: if col in df.columns: df.drop(col, axis=1, inplace=True)