Make each AI task seperate for faster testing

2026-02-26 21:39:41 -08:00
parent 79ee297e61
commit 7567496e1c
3 changed files with 13 additions and 32 deletions
@@ -1,5 +1,4 @@
 import os
-import json
 import joblib
 import numpy as np
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
@@ -17,8 +16,10 @@ def Predict(Symbol):
    # Pull 1 month of current data to make prediction against | for volatility 20
    df = yf.download(Symbol, period="2mo", auto_adjust=True)
    if not df.empty:
-        df = features.MakeFeatures(df, 1)
-        df = features.CleanDF(df)
+        # Remove the ticker column
+        df.columns = df.columns.get_level_values(0)
+        # Make the feature set
+        df = features.MakeFeatures(df)

    print(Symbol)

@@ -63,4 +64,4 @@ def Predict(Symbol):
    return movement_indicator

 if __name__ == "__main__":
-    Predict()
+    Predict("AAPL")
@@ -1,20 +1,13 @@
 import pandas as pd
-import numpy as np
-import matplotlib.pyplot as plt
-import datapuller
 import features
 import joblib
 import os
 os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
 from sklearn.model_selection import train_test_split
-from keras import Sequential, layers, optimizers, losses
+from keras import Sequential, layers, optimizers
 from keras.callbacks import ReduceLROnPlateau

-def TrainAI(include_pull):
-
-    if (include_pull):
-        # Pull New Data
-        datapuller.pull()
+def TrainAI():

    # Get the CWD for pathing due to being called from C#
    SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
@@ -23,6 +16,9 @@ def TrainAI(include_pull):
    # Load the dataset
    dataset = pd.read_parquet(os.path.join(DATA_DIR, "stocks.parquet"))

+    # Use external featuers to make sure loaded is the same
+    dataset = features.MakeFeatures(dataset)
+
    # Create the X, Y vareables
    X, Y, X_Scaler, Y_Scaler = features.Prepare(dataset)

@@ -75,18 +71,6 @@ def TrainAI(include_pull):
        callbacks=[reduce_lr]       # Reduce the learning_rate every run
    )

-    # Predict
-    test_predictions = dnn_model.predict(test_features).flatten()
-    a = plt.axes(aspect='equal')
-    plt.scatter(test_labels, test_predictions)
-    plt.xlabel('True Values')
-    plt.ylabel('Predictions')
-    lims = [0, 50]
-    plt.xlim(lims)
-    plt.ylim(lims)
-    _ = plt.plot(lims, lims)
-
-
    # Current Test Results: 0.3382711112499237
    test_results = dnn_model.evaluate(
        test_features, test_labels, verbose=0
@@ -97,6 +81,6 @@ def TrainAI(include_pull):
    dnn_model.save(os.path.join(DATA_DIR, "model.keras"))

 if __name__ == "__main__":
-    TrainAI(True)
+    TrainAI()

 # Last train Predicted Target_Close: [0.0022113274317234755, 0.0021446370519697666, 0.0022628342267125845, 0.002175702480599284, 0.0021452796645462513, 0.0020838389173150063, 0.0017336219316348433, 0.002210840117186308, 0.0021144403144717216, 0.0021278387866914272, 0.0021266420371830463, 0.002261851681396365, 0.002108299173414707, 0.002121902070939541, 0.0022294146474450827]
@@ -1,7 +1,6 @@
 import os
 import yfinance as yf
 import pandas as pd
-import features

 def pull():
    # Get the CWD for pathing due to being called from C# now
@@ -19,8 +18,8 @@ def pull():
        print(f"Processing: {i} of {len(tickers)}")
        df = yf.download(symbol, period="max", auto_adjust=True)
        if not df.empty:
-            # Use external featuers to make sure loaded is the same
-            df = features.MakeFeatures(df, i)
+            # Remove the ticker column
+            df.columns = df.columns.get_level_values(0)
            # add to master list
            all_data.append(df)

@@ -28,9 +27,6 @@ def pull():
    print("Processing data")
    final_df = pd.concat(all_data)

-    # Cleanup the data
-    final_df = features.CleanDF(final_df)
-
    # Save to file
    print("Writing data to file")
    final_df.to_parquet(os.path.join(DATA_DIR, "stocks.parquet"))