import pandas as pd import numpy as np import matplotlib.pyplot as plt import datapuller import os os.environ["CUDA_VISIBLE_DEVICES"] = "-1" from sklearn.model_selection import train_test_split from keras import Sequential, layers, optimizers def TrainAI(): # Pull New Data datapuller.pull() # Get the CWD for pathing due to being called from C# now SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) DATA_DIR = os.path.join(SCRIPT_DIR, "data") # Load the dataset dataset = pd.read_parquet(os.path.join(DATA_DIR, "stocks.parquet")) X = dataset.drop('Target_Close_Tomorrow', axis=1) Y = dataset['Target_Close_Tomorrow'] # Show the datatypes print(dataset.dtypes) # Split out the test and train train_features, test_features, train_labels, test_labels = train_test_split(X, Y, test_size=0.2) # Create a normalizer to nomralize the data normalizer = layers.Normalization(axis=-1) normalizer.adapt(np.array(train_features)) # Start with a linear model dnn_linear_model = Sequential([ layers.Input(shape=(train_features.shape[1],)), # Load the feature count dynamically normalizer, layers.Dense(64, activation='relu'), layers.Dense(64, activation='relu'), layers.Dense(1) ]) # Configure the model dnn_linear_model.compile( optimizer=optimizers.Adam(learning_rate=0.001), loss='mean_absolute_error' ) # Show the summary before training the model dnn_linear_model.summary() # Train the model Training_Data = dnn_linear_model.fit( train_features, train_labels, batch_size=1024, epochs=100, # Show progress verbose=1, # Calculate validation results on 20% of the training data. validation_split = 0.2 ) # Predict test_predictions = dnn_linear_model.predict(test_features).flatten() a = plt.axes(aspect='equal') plt.scatter(test_labels, test_predictions) plt.xlabel('True Values') plt.ylabel('Predictions') lims = [0, 50] plt.xlim(lims) plt.ylim(lims) _ = plt.plot(lims, lims) test_results = dnn_linear_model.evaluate( test_features, test_labels, verbose=0 ) # Save the model dnn_linear_model.save(os.path.join(DATA_DIR, "model.keras"))