Change date to number and normalize | 1 hot encode symbol
This commit is contained in:
@@ -15,7 +15,7 @@ def pull():
|
|||||||
|
|
||||||
# Scrape the data
|
# Scrape the data
|
||||||
all_data = []
|
all_data = []
|
||||||
for i, symbol in enumerate(tickers): # Try first 20
|
for i, symbol in enumerate(tickers):
|
||||||
print(f"Processing: {i} of {len(tickers)}")
|
print(f"Processing: {i} of {len(tickers)}")
|
||||||
df = yf.download(symbol, period="max", auto_adjust=True)
|
df = yf.download(symbol, period="max", auto_adjust=True)
|
||||||
if not df.empty:
|
if not df.empty:
|
||||||
@@ -24,11 +24,10 @@ def pull():
|
|||||||
|
|
||||||
# Make sure Date is actually a Date Object
|
# Make sure Date is actually a Date Object
|
||||||
df = df.reset_index()
|
df = df.reset_index()
|
||||||
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d")
|
df['Date'] = pd.to_numeric(pd.to_datetime(df['Date']))
|
||||||
df.set_index('Date', inplace=True)
|
|
||||||
|
|
||||||
# Add the Symbol column for tracking
|
# Add the Symbol column for tracking | as an int 1 hot encoded
|
||||||
df['Symbol'] = symbol
|
df['Symbol'] = i
|
||||||
|
|
||||||
# Add feature Spread
|
# Add feature Spread
|
||||||
df['Spread'] = abs( df['High'] - df['Low'] )
|
df['Spread'] = abs( df['High'] - df['Low'] )
|
||||||
@@ -48,6 +47,10 @@ def pull():
|
|||||||
print("Processing data")
|
print("Processing data")
|
||||||
final_df = pd.concat(all_data)
|
final_df = pd.concat(all_data)
|
||||||
|
|
||||||
|
# Nomralize the Date
|
||||||
|
final_df['Date'] = (final_df['Date'] - final_df['Date'].min()) / (final_df['Date'].max() - final_df['Date'].min())
|
||||||
|
final_df.set_index('Date', inplace=True)
|
||||||
|
|
||||||
# Drop rows with null values
|
# Drop rows with null values
|
||||||
final_df.dropna(inplace=True)
|
final_df.dropna(inplace=True)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user