Wrap in a class and set function as static
This commit is contained in:
+39
-35
@@ -1,49 +1,53 @@
|
||||
import yfinance as yf
|
||||
import pandas as pd
|
||||
|
||||
# Import the S&P 500 symbols
|
||||
symbols = pd.read_excel("./data/stock_symbols.xlsx")
|
||||
symbols.columns = symbols.columns.str.strip()
|
||||
tickers = symbols['Symbol'].tolist()
|
||||
class DataPuller:
|
||||
|
||||
# Scrape the data
|
||||
all_data = []
|
||||
for i, symbol in enumerate(tickers): # Try first 20
|
||||
print(f"Processing: {i} of {len(tickers)}")
|
||||
df = yf.download(symbol, period="max")
|
||||
if not df.empty:
|
||||
# Remove the ticker column
|
||||
df.columns = df.columns.get_level_values(0)
|
||||
@staticmethod
|
||||
def pull():
|
||||
# Import the S&P 500 symbols
|
||||
symbols = pd.read_excel("./data/stock_symbols.xlsx")
|
||||
symbols.columns = symbols.columns.str.strip()
|
||||
tickers = symbols['Symbol'].tolist()
|
||||
|
||||
# Make sure Date is actually a Date Object
|
||||
df = df.reset_index()
|
||||
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d")
|
||||
df.set_index('Date', inplace=True)
|
||||
# Scrape the data
|
||||
all_data = []
|
||||
for i, symbol in enumerate(tickers): # Try first 20
|
||||
print(f"Processing: {i} of {len(tickers)}")
|
||||
df = yf.download(symbol, period="max", auto_adjust=True)
|
||||
if not df.empty:
|
||||
# Remove the ticker column
|
||||
df.columns = df.columns.get_level_values(0)
|
||||
|
||||
# Add the Symbol column for tracking
|
||||
df['Symbol'] = symbol
|
||||
# Make sure Date is actually a Date Object
|
||||
df = df.reset_index()
|
||||
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d")
|
||||
df.set_index('Date', inplace=True)
|
||||
|
||||
# Add feature Spread
|
||||
df['Spread'] = abs( df['High'] - df['Low'] )
|
||||
# Add the Symbol column for tracking
|
||||
df['Symbol'] = symbol
|
||||
|
||||
# Add feature for Returns
|
||||
df['Return'] = df['Close'].pct_change()
|
||||
# Add feature Spread
|
||||
df['Spread'] = abs( df['High'] - df['Low'] )
|
||||
|
||||
# Add feature for volitility last 5
|
||||
df['Volatility_5'] = df['Return'].transform(lambda x: x.rolling(5).std())
|
||||
# Add feature for Returns
|
||||
df['Return'] = df['Close'].pct_change()
|
||||
|
||||
# Add feature for volitility last 20
|
||||
df['Volatility_20'] = df['Return'].transform(lambda x: x.rolling(20).std())
|
||||
# Add feature for volitility last 5
|
||||
df['Volatility_5'] = df['Return'].transform(lambda x: x.rolling(5).std())
|
||||
|
||||
all_data.append(df)
|
||||
# Add feature for volitility last 20
|
||||
df['Volatility_20'] = df['Return'].transform(lambda x: x.rolling(20).std())
|
||||
|
||||
# Concatinate into a combined list and cache
|
||||
print("Processing data")
|
||||
final_df = pd.concat(all_data)
|
||||
all_data.append(df)
|
||||
|
||||
# Drop rows with null values
|
||||
final_df.dropna(inplace=True)
|
||||
# Concatinate into a combined list and cache
|
||||
print("Processing data")
|
||||
final_df = pd.concat(all_data)
|
||||
|
||||
print("Writing data to file")
|
||||
final_df.to_parquet("./data/stocks.parquet")
|
||||
final_df.head(200).to_csv("./data/stocks_preview.csv")
|
||||
# Drop rows with null values
|
||||
final_df.dropna(inplace=True)
|
||||
|
||||
print("Writing data to file")
|
||||
final_df.to_parquet("./data/stocks.parquet")
|
||||
final_df.head(200).to_csv("./data/stocks_preview.csv")
|
||||
Reference in New Issue
Block a user