Scale the data before learning to normalize the output

2026-02-18 18:33:21 -08:00
parent a81e3a992d
commit 7a4fc2cda4
7 changed files with 240 additions and 212 deletions
@@ -1,3 +1,4 @@
+from sklearn.preprocessing import StandardScaler
 import pandas as pd
 import numpy as np

@@ -52,15 +53,27 @@ def MakeFeatures(df, i):
    # This is our training metric of 5 days ahead
    df['Target_Close'] = df['Close'].shift(-5).pct_change()

-    # for Up (> 0.5%), -1 for Down (< -0.5%), 0 for Flat
-    df['Target_Direction'] = np.where(df['Target_Close'] > 0.005, 1, np.where(df['Target_Close'] < -0.005, -1, 0))
-
    # Volume Change
    df['Volume_Chg'] = df['Volume'].pct_change()

    # Return new df with new features
    return df

+def Prepare(df):
+
+    # Remove indicators and set the target
+    X = df.drop('Target_Close', axis=1)
+    Y = df['Target_Close']
+
+    # Scale the features to the same size
+    feature_scaler = StandardScaler()
+    X_scaled = feature_scaler.fit_transform(X)
+
+    # Safe for the Y
+    target_scaler = StandardScaler()
+    y_scaled = target_scaler.fit_transform(Y.values.reshape(-1, 1))
+
+    return X_scaled, y_scaled, feature_scaler, target_scaler

 def CleanDF(df):
    # Make date the index so it doesnt influence the training
@@ -69,5 +82,8 @@ def CleanDF(df):
    # Drop rows with null values
    df.dropna(inplace=True)

+    # Replace Infinity with 0 -> This fixes the AI mental breakdown
+    df['Volume_Chg'] = df['Volume_Chg'].replace([np.inf, -np.inf], 0)
+
    # Return new df thats cleaned
    return df