Change date to number and normalize | 1 hot encode symbol
This commit is contained in:
@@ -15,7 +15,7 @@ def pull():
|
||||
|
||||
# Scrape the data
|
||||
all_data = []
|
||||
for i, symbol in enumerate(tickers): # Try first 20
|
||||
for i, symbol in enumerate(tickers):
|
||||
print(f"Processing: {i} of {len(tickers)}")
|
||||
df = yf.download(symbol, period="max", auto_adjust=True)
|
||||
if not df.empty:
|
||||
@@ -24,11 +24,10 @@ def pull():
|
||||
|
||||
# Make sure Date is actually a Date Object
|
||||
df = df.reset_index()
|
||||
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d")
|
||||
df.set_index('Date', inplace=True)
|
||||
df['Date'] = pd.to_numeric(pd.to_datetime(df['Date']))
|
||||
|
||||
# Add the Symbol column for tracking
|
||||
df['Symbol'] = symbol
|
||||
# Add the Symbol column for tracking | as an int 1 hot encoded
|
||||
df['Symbol'] = i
|
||||
|
||||
# Add feature Spread
|
||||
df['Spread'] = abs( df['High'] - df['Low'] )
|
||||
@@ -48,6 +47,10 @@ def pull():
|
||||
print("Processing data")
|
||||
final_df = pd.concat(all_data)
|
||||
|
||||
# Nomralize the Date
|
||||
final_df['Date'] = (final_df['Date'] - final_df['Date'].min()) / (final_df['Date'].max() - final_df['Date'].min())
|
||||
final_df.set_index('Date', inplace=True)
|
||||
|
||||
# Drop rows with null values
|
||||
final_df.dropna(inplace=True)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user