import pandas as pd import numpy as np import matplotlib.pyplot as plt import datapuller import features import joblib import os os.environ["CUDA_VISIBLE_DEVICES"] = "-1" from sklearn.model_selection import train_test_split from keras import Sequential, layers, optimizers, losses def TrainAI(include_pull): if (include_pull): # Pull New Data datapuller.pull() # Get the CWD for pathing due to being called from C# SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(SCRIPT_DIR, "data") # Load the dataset dataset = pd.read_parquet(os.path.join(DATA_DIR, "stocks.parquet")) # Create the X, Y vareables X, Y, X_Scaler, Y_Scaler = features.Prepare(dataset) # Save the scalers for future use joblib.dump(X_Scaler, os.path.join(DATA_DIR, "feature_scaler.pkl")) joblib.dump(Y_Scaler, os.path.join(DATA_DIR, "target_scaler.pkl")) # Show the datatypes print(dataset.dtypes) # Split out the test and train train_features, test_features, train_labels, test_labels = train_test_split(X, Y, test_size=0.2) # Create a normalizer to nomralize the data normalizer = layers.Normalization(axis=-1) normalizer.adapt(np.array(train_features)) # Create the DNN dnn_model = Sequential([ layers.Input(shape=(train_features.shape[1],)), # Load the feature count dynamically normalizer, layers.Dense(64, activation='elu'), layers.Dense(64, activation='elu'), layers.Dense(1) ]) # Configure the model dnn_model.compile( optimizer=optimizers.Adam(learning_rate=0.00001, clipvalue=1.0), loss=losses.Huber() ) # Show the summary before training the model dnn_model.summary() # Train the model Training_Data = dnn_model.fit( train_features, train_labels, batch_size=64, epochs=39, # Tuned to the point before overfitting verbose=1, # Show progress validation_split = 0.2 # Calculate validation results on 20% of the training data. ) # Predict test_predictions = dnn_model.predict(test_features).flatten() a = plt.axes(aspect='equal') plt.scatter(test_labels, test_predictions) plt.xlabel('True Values') plt.ylabel('Predictions') lims = [0, 50] plt.xlim(lims) plt.ylim(lims) _ = plt.plot(lims, lims) # Current Test Results: 1.221876300405711e-05 test_results = dnn_model.evaluate( test_features, test_labels, verbose=0 ) print(f"Test Results: {test_results}") # Save the model dnn_model.save(os.path.join(DATA_DIR, "model.keras")) if __name__ == "__main__": TrainAI(False)