Import python venv for stability

This commit is contained in:
2026-02-15 21:24:16 -08:00
parent 1343e93a59
commit 7d784705c9
4997 changed files with 1628270 additions and 0 deletions
@@ -0,0 +1,59 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# yfinance - market data downloader
# https://github.com/ranaroussi/yfinance
#
# Copyright 2017-2019 Ran Aroussi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from . import version
from .search import Search
from .lookup import Lookup
from .ticker import Ticker
from .calendars import Calendars
from .tickers import Tickers
from .multi import download
from .live import WebSocket, AsyncWebSocket
from .utils import enable_debug_mode
from .cache import set_tz_cache_location
from .domain.sector import Sector
from .domain.industry import Industry
from .domain.market import Market
from .config import YfConfig as config
from .screener.query import EquityQuery, FundQuery
from .screener.screener import screen, PREDEFINED_SCREENER_QUERIES
__version__ = version.version
__author__ = "Ran Aroussi"
import warnings
warnings.filterwarnings('default', category=DeprecationWarning, module='^yfinance')
__all__ = ['download', 'Market', 'Search', 'Lookup', 'Ticker', 'Tickers', 'enable_debug_mode', 'set_tz_cache_location', 'Sector', 'Industry', 'WebSocket', 'AsyncWebSocket', 'Calendars']
# screener stuff:
__all__ += ['EquityQuery', 'FundQuery', 'screen', 'PREDEFINED_SCREENER_QUERIES']
# Config stuff:
_NOTSET=object()
def set_config(proxy=_NOTSET, retries=_NOTSET):
if proxy is not _NOTSET:
warnings.warn("Set proxy via new config control: yf.config.network.proxy = proxy", DeprecationWarning)
config.network.proxy = proxy
if retries is not _NOTSET:
warnings.warn("Set retries via new config control: yf.config.network.retries = retries", DeprecationWarning)
config.network.retries = retries
__all__ += ['config', 'set_config']
@@ -0,0 +1,802 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# yfinance - market data downloader
# https://github.com/ranaroussi/yfinance
#
# Copyright 2017-2019 Ran Aroussi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import print_function
import json as _json
from typing import Optional, Union
from urllib.parse import quote as urlencode
import numpy as np
import pandas as pd
from curl_cffi import requests
from . import utils, cache
from .const import _MIC_TO_YAHOO_SUFFIX
from .data import YfData
from .config import YfConfig
from .exceptions import YFDataException, YFEarningsDateMissing, YFRateLimitError
from .live import WebSocket
from .scrapers.analysis import Analysis
from .scrapers.fundamentals import Fundamentals
from .scrapers.holders import Holders
from .scrapers.quote import Quote, FastInfo
from .scrapers.history import PriceHistory
from .scrapers.funds import FundsData
from .const import _BASE_URL_, _ROOT_URL_, _QUERY1_URL_
from io import StringIO
from bs4 import BeautifulSoup
_tz_info_fetch_ctr = 0
class TickerBase:
def __init__(self, ticker, session=None):
"""
Initialize a Yahoo Finance Ticker object.
Args:
ticker (str | tuple[str, str]):
Yahoo Finance symbol (e.g. "AAPL")
or a tuple of (symbol, MIC) e.g. ('OR','XPAR')
(MIC = market identifier code)
session (requests.Session, optional):
Custom requests session.
"""
if isinstance(ticker, tuple):
if len(ticker) != 2:
raise ValueError("Ticker tuple must be (symbol, mic_code)")
base_symbol, mic_code = ticker
# ticker = yahoo_ticker(base_symbol, mic_code)
if mic_code.startswith('.'):
mic_code = mic_code[1:]
if mic_code.upper() not in _MIC_TO_YAHOO_SUFFIX:
raise ValueError(f"Unknown MIC code: '{mic_code}'")
sfx = _MIC_TO_YAHOO_SUFFIX[mic_code.upper()]
if sfx != '':
ticker = f'{base_symbol}.{sfx}'
else:
ticker = base_symbol
self.ticker = ticker.upper()
self.session = session or requests.Session(impersonate="chrome")
self._tz = None
self._isin = None
self._news = []
self._shares = None
self._earnings_dates = {}
self._earnings = None
self._financials = None
# raise an error if user tries to give empty ticker
if self.ticker == "":
raise ValueError("Empty ticker name")
self._data: YfData = YfData(session=session)
# accept isin as ticker
if utils.is_isin(self.ticker):
isin = self.ticker
c = cache.get_isin_cache()
self.ticker = c.lookup(isin)
if not self.ticker:
self.ticker = utils.get_ticker_by_isin(isin)
if self.ticker == "":
raise ValueError(f"Invalid ISIN number: {isin}")
if self.ticker:
c.store(isin, self.ticker)
# self._price_history = PriceHistory(self._data, self.ticker)
self._price_history = None # lazy-load
self._analysis = Analysis(self._data, self.ticker)
self._holders = Holders(self._data, self.ticker)
self._quote = Quote(self._data, self.ticker)
self._fundamentals = Fundamentals(self._data, self.ticker)
self._funds_data = None
self._fast_info = None
self._message_handler = None
self.ws = None
@utils.log_indent_decorator
def history(self, *args, **kwargs) -> pd.DataFrame:
return self._lazy_load_price_history().history(*args, **kwargs)
# ------------------------
def _lazy_load_price_history(self):
if self._price_history is None:
self._price_history = PriceHistory(self._data, self.ticker, self._get_ticker_tz(timeout=10))
return self._price_history
def _get_ticker_tz(self, timeout):
if self._tz is not None:
return self._tz
c = cache.get_tz_cache()
tz = c.lookup(self.ticker)
if tz and not utils.is_valid_timezone(tz):
# Clear from cache and force re-fetch
c.store(self.ticker, None)
tz = None
if tz is None:
tz = self._fetch_ticker_tz(timeout)
if tz is None:
# _fetch_ticker_tz works in 99.999% of cases.
# For rare fail get from info.
global _tz_info_fetch_ctr
if _tz_info_fetch_ctr < 2:
# ... but limit. If _fetch_ticker_tz() always
# failing then bigger problem.
_tz_info_fetch_ctr += 1
for k in ['exchangeTimezoneName', 'timeZoneFullName']:
if k in self.info:
tz = self.info[k]
break
if utils.is_valid_timezone(tz):
c.store(self.ticker, tz)
else:
tz = None
self._tz = tz
return tz
@utils.log_indent_decorator
def _fetch_ticker_tz(self, timeout):
# Query Yahoo for fast price data just to get returned timezone
logger = utils.get_yf_logger()
params = {"range": "1d", "interval": "1d"}
# Getting data from json
url = f"{_BASE_URL_}/v8/finance/chart/{self.ticker}"
try:
data = self._data.cache_get(url=url, params=params, timeout=timeout)
data = data.json()
except YFRateLimitError:
# Must propagate this
raise
except Exception as e:
if not YfConfig.debug.hide_exceptions:
raise
logger.error(f"Failed to get ticker '{self.ticker}' reason: {e}")
return None
else:
error = data.get('chart', {}).get('error', None)
if error:
# explicit error from yahoo API
logger.debug(f"Got error from yahoo api for ticker {self.ticker}, Error: {error}")
else:
try:
return data["chart"]["result"][0]["meta"]["exchangeTimezoneName"]
except Exception as err:
if not YfConfig.debug.hide_exceptions:
raise
logger.error(f"Could not get exchangeTimezoneName for ticker '{self.ticker}' reason: {err}")
logger.debug("Got response: ")
logger.debug("-------------")
logger.debug(f" {data}")
logger.debug("-------------")
return None
def get_recommendations(self, as_dict=False):
"""
Returns a DataFrame with the recommendations
Columns: period strongBuy buy hold sell strongSell
"""
data = self._quote.recommendations
if as_dict:
return data.to_dict()
return data
def get_recommendations_summary(self, as_dict=False):
return self.get_recommendations(as_dict=as_dict)
def get_upgrades_downgrades(self, as_dict=False):
"""
Returns a DataFrame with the recommendations changes (upgrades/downgrades)
Index: date of grade
Columns: firm toGrade fromGrade action
"""
data = self._quote.upgrades_downgrades
if as_dict:
return data.to_dict()
return data
def get_calendar(self) -> dict:
return self._quote.calendar
def get_sec_filings(self) -> dict:
return self._quote.sec_filings
def get_major_holders(self, as_dict=False):
data = self._holders.major
if as_dict:
return data.to_dict()
return data
def get_institutional_holders(self, as_dict=False):
data = self._holders.institutional
if data is not None:
if as_dict:
return data.to_dict()
return data
def get_mutualfund_holders(self, as_dict=False):
data = self._holders.mutualfund
if data is not None:
if as_dict:
return data.to_dict()
return data
def get_insider_purchases(self, as_dict=False):
data = self._holders.insider_purchases
if data is not None:
if as_dict:
return data.to_dict()
return data
def get_insider_transactions(self, as_dict=False):
data = self._holders.insider_transactions
if data is not None:
if as_dict:
return data.to_dict()
return data
def get_insider_roster_holders(self, as_dict=False):
data = self._holders.insider_roster
if data is not None:
if as_dict:
return data.to_dict()
return data
def get_info(self) -> dict:
data = self._quote.info
return data
def get_fast_info(self):
if self._fast_info is None:
self._fast_info = FastInfo(self)
return self._fast_info
def get_sustainability(self, as_dict=False):
data = self._quote.sustainability
if as_dict:
return data.to_dict()
return data
def get_analyst_price_targets(self) -> dict:
"""
Keys: current low high mean median
"""
data = self._analysis.analyst_price_targets
return data
def get_earnings_estimate(self, as_dict=False):
"""
Index: 0q +1q 0y +1y
Columns: numberOfAnalysts avg low high yearAgoEps growth
"""
data = self._analysis.earnings_estimate
return data.to_dict() if as_dict else data
def get_revenue_estimate(self, as_dict=False):
"""
Index: 0q +1q 0y +1y
Columns: numberOfAnalysts avg low high yearAgoRevenue growth
"""
data = self._analysis.revenue_estimate
return data.to_dict() if as_dict else data
def get_earnings_history(self, as_dict=False):
"""
Index: pd.DatetimeIndex
Columns: epsEstimate epsActual epsDifference surprisePercent
"""
data = self._analysis.earnings_history
return data.to_dict() if as_dict else data
def get_eps_trend(self, as_dict=False):
"""
Index: 0q +1q 0y +1y
Columns: current 7daysAgo 30daysAgo 60daysAgo 90daysAgo
"""
data = self._analysis.eps_trend
return data.to_dict() if as_dict else data
def get_eps_revisions(self, as_dict=False):
"""
Index: 0q +1q 0y +1y
Columns: upLast7days upLast30days downLast7days downLast30days
"""
data = self._analysis.eps_revisions
return data.to_dict() if as_dict else data
def get_growth_estimates(self, as_dict=False):
"""
Index: 0q +1q 0y +1y +5y -5y
Columns: stock industry sector index
"""
data = self._analysis.growth_estimates
return data.to_dict() if as_dict else data
def get_earnings(self, as_dict=False, freq="yearly"):
"""
:Parameters:
as_dict: bool
Return table as Python dict
Default is False
freq: str
"yearly" or "quarterly" or "trailing"
Default is "yearly"
"""
if self._fundamentals.earnings is None:
return None
data = self._fundamentals.earnings[freq]
if as_dict:
dict_data = data.to_dict()
dict_data['financialCurrency'] = 'USD' if 'financialCurrency' not in self._earnings else self._earnings[
'financialCurrency']
return dict_data
return data
def get_income_stmt(self, as_dict=False, pretty=False, freq="yearly"):
"""
:Parameters:
as_dict: bool
Return table as Python dict
Default is False
pretty: bool
Format row names nicely for readability
Default is False
freq: str
"yearly" or "quarterly" or "trailing"
Default is "yearly"
"""
data = self._fundamentals.financials.get_income_time_series(freq=freq)
if pretty:
data = data.copy()
data.index = utils.camel2title(data.index, sep=' ', acronyms=["EBIT", "EBITDA", "EPS", "NI"])
if as_dict:
return data.to_dict()
return data
def get_incomestmt(self, as_dict=False, pretty=False, freq="yearly"):
return self.get_income_stmt(as_dict, pretty, freq)
def get_financials(self, as_dict=False, pretty=False, freq="yearly"):
return self.get_income_stmt(as_dict, pretty, freq)
def get_balance_sheet(self, as_dict=False, pretty=False, freq="yearly"):
"""
:Parameters:
as_dict: bool
Return table as Python dict
Default is False
pretty: bool
Format row names nicely for readability
Default is False
freq: str
"yearly" or "quarterly"
Default is "yearly"
"""
data = self._fundamentals.financials.get_balance_sheet_time_series(freq=freq)
if pretty:
data = data.copy()
data.index = utils.camel2title(data.index, sep=' ', acronyms=["PPE"])
if as_dict:
return data.to_dict()
return data
def get_balancesheet(self, as_dict=False, pretty=False, freq="yearly"):
return self.get_balance_sheet(as_dict, pretty, freq)
def get_cash_flow(self, as_dict=False, pretty=False, freq="yearly") -> Union[pd.DataFrame, dict]:
"""
:Parameters:
as_dict: bool
Return table as Python dict
Default is False
pretty: bool
Format row names nicely for readability
Default is False
freq: str
"yearly" or "quarterly"
Default is "yearly"
"""
data = self._fundamentals.financials.get_cash_flow_time_series(freq=freq)
if pretty:
data = data.copy()
data.index = utils.camel2title(data.index, sep=' ', acronyms=["PPE"])
if as_dict:
return data.to_dict()
return data
def get_cashflow(self, as_dict=False, pretty=False, freq="yearly"):
return self.get_cash_flow(as_dict, pretty, freq)
def get_dividends(self, period="max") -> pd.Series:
return self._lazy_load_price_history().get_dividends(period=period)
def get_capital_gains(self, period="max") -> pd.Series:
return self._lazy_load_price_history().get_capital_gains(period=period)
def get_splits(self, period="max") -> pd.Series:
return self._lazy_load_price_history().get_splits(period=period)
def get_actions(self, period="max") -> pd.Series:
return self._lazy_load_price_history().get_actions(period=period)
def get_shares(self, as_dict=False) -> Union[pd.DataFrame, dict]:
data = self._fundamentals.shares
if as_dict:
return data.to_dict()
return data
@utils.log_indent_decorator
def get_shares_full(self, start=None, end=None):
logger = utils.get_yf_logger()
# Process dates
tz = self._get_ticker_tz(timeout=10)
dt_now = pd.Timestamp.now('UTC').tz_convert(tz)
if start is not None:
start = utils._parse_user_dt(start, tz)
if end is not None:
end = utils._parse_user_dt(end, tz)
if end is None:
end = dt_now
if start is None:
start = end - pd.Timedelta(days=548) # 18 months
if start >= end:
logger.error("Start date must be before end")
return None
start = start.floor("D")
end = end.ceil("D")
# Fetch
ts_url_base = f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{self.ticker}?symbol={self.ticker}"
shares_url = f"{ts_url_base}&period1={int(start.timestamp())}&period2={int(end.timestamp())}"
try:
json_data = self._data.cache_get(url=shares_url)
json_data = json_data.json()
except (_json.JSONDecodeError, requests.exceptions.RequestException):
if not YfConfig.debug.hide_exceptions:
raise
logger.error(f"{self.ticker}: Yahoo web request for share count failed")
return None
try:
fail = json_data["finance"]["error"]["code"] == "Bad Request"
except KeyError:
fail = False
if fail:
if not YfConfig.debug.hide_exceptions:
raise requests.exceptions.HTTPError("Yahoo web request for share count returned 'Bad Request'")
logger.error(f"{self.ticker}: Yahoo web request for share count failed")
return None
shares_data = json_data["timeseries"]["result"]
if "shares_out" not in shares_data[0]:
return None
try:
df = pd.Series(shares_data[0]["shares_out"], index=pd.to_datetime(shares_data[0]["timestamp"], unit="s"))
except Exception as e:
if not YfConfig.debug.hide_exceptions:
raise
logger.error(f"{self.ticker}: Failed to parse shares count data: {e}")
return None
df.index = df.index.tz_localize(tz)
df = df.sort_index()
return df
def get_isin(self) -> Optional[str]:
# *** experimental ***
if self._isin is not None:
return self._isin
ticker = self.ticker.upper()
if "-" in ticker or "^" in ticker:
self._isin = '-'
return self._isin
q = ticker
if self._quote.info is None:
# Don't print error message cause self._quote.info will print one
return None
if "shortName" in self._quote.info:
q = self._quote.info['shortName']
url = f'https://markets.businessinsider.com/ajax/SearchController_Suggest?max_results=25&query={urlencode(q)}'
data = self._data.cache_get(url=url).text
search_str = f'"{ticker}|'
if search_str not in data:
if q.lower() in data.lower():
search_str = '"|'
if search_str not in data:
self._isin = '-'
return self._isin
else:
self._isin = '-'
return self._isin
self._isin = data.split(search_str)[1].split('"')[0].split('|')[0]
return self._isin
def get_news(self, count=10, tab="news") -> list:
"""Allowed options for tab: "news", "all", "press releases"""
if self._news:
return self._news
logger = utils.get_yf_logger()
tab_queryrefs = {
"all": "newsAll",
"news": "latestNews",
"press releases": "pressRelease",
}
query_ref = tab_queryrefs.get(tab.lower())
if not query_ref:
raise ValueError(f"Invalid tab name '{tab}'. Choose from: {', '.join(tab_queryrefs.keys())}")
url = f"{_ROOT_URL_}/xhr/ncp?queryRef={query_ref}&serviceKey=ncp_fin"
payload = {
"serviceConfig": {
"snippetCount": count,
"s": [self.ticker]
}
}
data = self._data.post(url, body=payload)
if data is None or "Will be right back" in data.text:
raise YFDataException("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***")
try:
data = data.json()
except _json.JSONDecodeError:
if not YfConfig.debug.hide_exceptions:
raise
logger.error(f"{self.ticker}: Failed to retrieve the news and received faulty response instead.")
data = {}
news = data.get("data", {}).get("tickerStream", {}).get("stream", [])
self._news = [article for article in news if not article.get('ad', [])]
return self._news
def get_earnings_dates(self, limit = 12, offset = 0) -> Optional[pd.DataFrame]:
if limit > 100:
raise ValueError("Yahoo caps limit at 100")
if self._earnings_dates and limit in self._earnings_dates:
return self._earnings_dates[limit]
df = self._get_earnings_dates_using_scrape(limit, offset)
self._earnings_dates[limit] = df
return df
@utils.log_indent_decorator
def _get_earnings_dates_using_scrape(self, limit = 12, offset = 0) -> Optional[pd.DataFrame]:
"""
Uses YfData.cache_get() to scrape earnings data from YahooFinance.
(https://finance.yahoo.com/calendar/earnings?symbol=INTC)
Args:
limit (int): Number of rows to extract (max=100)
offset (int): if 0, search from future EPS estimates.
if 1, search from the most recent EPS.
if x, search from x'th recent EPS.
Returns:
pd.DataFrame in the following format.
EPS Estimate Reported EPS Surprise(%)
Date
2025-10-30 2.97 - -
2025-07-22 1.73 1.54 -10.88
2025-05-06 2.63 2.7 2.57
2025-02-06 2.09 2.42 16.06
2024-10-31 1.92 1.55 -19.36
... ... ... ...
2014-07-31 0.61 0.65 7.38
2014-05-01 0.55 0.68 22.92
2014-02-13 0.55 0.58 6.36
2013-10-31 0.51 0.54 6.86
2013-08-01 0.46 0.5 7.86
"""
#####################################################
# Define Constants
#####################################################
if limit > 0 and limit <= 25:
size = 25
elif limit > 25 and limit <= 50:
size = 50
elif limit > 50 and limit <= 100:
size = 100
else:
raise ValueError("Please use limit <= 100")
# Define the URL
url = "https://finance.yahoo.com/calendar/earnings?symbol={}&offset={}&size={}".format(
self.ticker, offset, size
)
#####################################################
# Get data
#####################################################
response = self._data.cache_get(url)
#####################################################
# Response -> pd.DataFrame
#####################################################
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(response.text, "html.parser")
# This page should have only one <table>
table = soup.find("table")
# If the table is found
if table:
# Get the HTML string of the table
table_html = str(table)
# Wrap the HTML string in a StringIO object
html_stringio = StringIO(table_html)
# Pass the StringIO object to pd.read_html()
df = pd.read_html(html_stringio, na_values=['-'])[0]
# Drop redundant columns
df = df.drop(["Symbol", "Company"], axis=1)
# Backwards compatibility
df.rename(columns={'Surprise (%)': 'Surprise(%)'}, inplace=True)
df = df.dropna(subset="Earnings Date")
# Parse earnings date
# - Pandas doesn't like EDT, EST
df['Earnings Date'] = df['Earnings Date'].str.replace('EDT', 'America/New_York')
df['Earnings Date'] = df['Earnings Date'].str.replace('EST', 'America/New_York')
# - separate timezone string (last word)
dt_parts = df['Earnings Date'].str.rsplit(' ', n=1, expand=True)
dts = dt_parts[0]
tzs = dt_parts[1]
df['Earnings Date'] = pd.to_datetime(dts, format='%B %d, %Y at %I %p')
df['Earnings Date'] = pd.Series([dt.tz_localize(tz) for dt, tz in zip(df['Earnings Date'], tzs)])
df = df.set_index("Earnings Date")
else:
err_msg = "No earnings dates found, symbol may be delisted"
logger = utils.get_yf_logger()
logger.error(f'{self.ticker}: {err_msg}')
return None
return df
@utils.log_indent_decorator
def _get_earnings_dates_using_screener(self, limit=12) -> Optional[pd.DataFrame]:
"""
Get earning dates (future and historic)
In Summer 2025, Yahoo stopped updating the data at this endpoint.
So reverting to scraping HTML.
Args:
limit (int): max amount of upcoming and recent earnings dates to return.
Default value 12 should return next 4 quarters and last 8 quarters.
Increase if more history is needed.
Returns:
pd.DataFrame
"""
logger = utils.get_yf_logger()
# Fetch data
url = f"{_QUERY1_URL_}/v1/finance/visualization"
params = {"lang": "en-US", "region": "US"}
body = {
"size": limit,
"query": { "operator": "eq", "operands": ["ticker", self.ticker] },
"sortField": "startdatetime",
"sortType": "DESC",
"entityIdType": "earnings",
"includeFields": ["startdatetime", "timeZoneShortName", "epsestimate", "epsactual", "epssurprisepct", "eventtype"]
}
response = self._data.post(url, params=params, body=body)
json_data = response.json()
# Extract data
columns = [row['label'] for row in json_data['finance']['result'][0]['documents'][0]['columns']]
rows = json_data['finance']['result'][0]['documents'][0]['rows']
df = pd.DataFrame(rows, columns=columns)
if df.empty:
_exception = YFEarningsDateMissing(self.ticker)
err_msg = str(_exception)
logger.error(f'{self.ticker}: {err_msg}')
return None
# Convert eventtype
# - 1 = earnings call (manually confirmed)
# - 2 = earnings report
# - 11 = stockholders meeting (manually confirmed)
df['Event Type'] = df['Event Type'].replace('^1$', 'Call', regex=True)
df['Event Type'] = df['Event Type'].replace('^2$', 'Earnings', regex=True)
df['Event Type'] = df['Event Type'].replace('^11$', 'Meeting', regex=True)
# Calculate earnings date
df['Earnings Date'] = pd.to_datetime(df['Event Start Date'])
tz = self._get_ticker_tz(timeout=30)
if df['Earnings Date'].dt.tz is None:
df['Earnings Date'] = df['Earnings Date'].dt.tz_localize(tz)
else:
df['Earnings Date'] = df['Earnings Date'].dt.tz_convert(tz)
# Convert types
columns_to_update = ['Surprise (%)', 'EPS Estimate', 'Reported EPS']
df[columns_to_update] = df[columns_to_update].astype('float64').replace(0.0, np.nan)
# Format the dataframe
df.drop(['Event Start Date', 'Timezone short name'], axis=1, inplace=True)
df.set_index('Earnings Date', inplace=True)
df.rename(columns={'Surprise (%)': 'Surprise(%)'}, inplace=True) # Compatibility
self._earnings_dates[limit] = df
return df
def get_history_metadata(self) -> dict:
return self._lazy_load_price_history().get_history_metadata()
def get_funds_data(self) -> Optional[FundsData]:
if not self._funds_data:
self._funds_data = FundsData(self._data, self.ticker)
return self._funds_data
def live(self, message_handler=None, verbose=True):
self._message_handler = message_handler
self.ws = WebSocket(verbose=verbose)
self.ws.subscribe(self.ticker)
self.ws.listen(self._message_handler)
@@ -0,0 +1,639 @@
import peewee as _peewee
from threading import Lock
import os as _os
import platformdirs as _ad
import atexit as _atexit
import datetime as _dt
import pickle as _pkl
from .utils import get_yf_logger
_cache_init_lock = Lock()
# --------------
# TimeZone cache
# --------------
class _TzCacheException(Exception):
pass
class _TzCacheDummy:
"""Dummy cache to use if tz cache is disabled"""
def lookup(self, tkr):
return None
def store(self, tkr, tz):
pass
@property
def tz_db(self):
return None
class _TzCacheManager:
_tz_cache = None
@classmethod
def get_tz_cache(cls):
if cls._tz_cache is None:
with _cache_init_lock:
cls._initialise()
return cls._tz_cache
@classmethod
def _initialise(cls, cache_dir=None):
cls._tz_cache = _TzCache()
class _TzDBManager:
_db = None
_cache_dir = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
@classmethod
def get_database(cls):
if cls._db is None:
cls._initialise()
return cls._db
@classmethod
def close_db(cls):
if cls._db is not None:
try:
cls._db.close()
except Exception:
# Must discard exceptions because Python trying to quit.
pass
@classmethod
def _initialise(cls, cache_dir=None):
if cache_dir is not None:
cls._cache_dir = cache_dir
if not _os.path.isdir(cls._cache_dir):
try:
_os.makedirs(cls._cache_dir)
except OSError as err:
raise _TzCacheException(f"Error creating TzCache folder: '{cls._cache_dir}' reason: {err}")
elif not (_os.access(cls._cache_dir, _os.R_OK) and _os.access(cls._cache_dir, _os.W_OK)):
raise _TzCacheException(f"Cannot read and write in TzCache folder: '{cls._cache_dir}'")
cls._db = _peewee.SqliteDatabase(
_os.path.join(cls._cache_dir, 'tkr-tz.db'),
pragmas={'journal_mode': 'wal', 'cache_size': -64}
)
old_cache_file_path = _os.path.join(cls._cache_dir, "tkr-tz.csv")
if _os.path.isfile(old_cache_file_path):
_os.remove(old_cache_file_path)
@classmethod
def set_location(cls, new_cache_dir):
if cls._db is not None:
cls._db.close()
cls._db = None
cls._cache_dir = new_cache_dir
@classmethod
def get_location(cls):
return cls._cache_dir
# close DB when Python exists
_atexit.register(_TzDBManager.close_db)
tz_db_proxy = _peewee.Proxy()
class _TZ_KV(_peewee.Model):
key = _peewee.CharField(primary_key=True)
value = _peewee.CharField(null=True)
class Meta:
database = tz_db_proxy
without_rowid = True
class _TzCache:
def __init__(self):
self.initialised = -1
self.db = None
self.dummy = False
def get_db(self):
if self.db is not None:
return self.db
try:
self.db = _TzDBManager.get_database()
except _TzCacheException as err:
get_yf_logger().info(f"Failed to create TzCache, reason: {err}. "
"TzCache will not be used. "
"Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'")
self.dummy = True
return None
return self.db
def initialise(self):
if self.initialised != -1:
return
db = self.get_db()
if db is None:
self.initialised = 0 # failure
return
db.connect()
tz_db_proxy.initialize(db)
try:
db.create_tables([_TZ_KV])
except _peewee.OperationalError as e:
if 'WITHOUT' in str(e):
_TZ_KV._meta.without_rowid = False
db.create_tables([_TZ_KV])
else:
raise
self.initialised = 1 # success
def lookup(self, key):
if self.dummy:
return None
if self.initialised == -1:
self.initialise()
if self.initialised == 0: # failure
return None
try:
return _TZ_KV.get(_TZ_KV.key == key).value
except _TZ_KV.DoesNotExist:
return None
def store(self, key, value):
if self.dummy:
return
if self.initialised == -1:
self.initialise()
if self.initialised == 0: # failure
return
db = self.get_db()
if db is None:
return
try:
if value is None:
q = _TZ_KV.delete().where(_TZ_KV.key == key)
q.execute()
return
with db.atomic():
_TZ_KV.insert(key=key, value=value).execute()
except _peewee.IntegrityError:
# Integrity error means the key already exists. Try updating the key.
old_value = self.lookup(key)
if old_value != value:
get_yf_logger().debug(f"Value for key {key} changed from {old_value} to {value}.")
with db.atomic():
q = _TZ_KV.update(value=value).where(_TZ_KV.key == key)
q.execute()
def get_tz_cache():
return _TzCacheManager.get_tz_cache()
# --------------
# Cookie cache
# --------------
class _CookieCacheException(Exception):
pass
class _CookieCacheDummy:
"""Dummy cache to use if Cookie cache is disabled"""
def lookup(self, tkr):
return None
def store(self, tkr, Cookie):
pass
@property
def Cookie_db(self):
return None
class _CookieCacheManager:
_Cookie_cache = None
@classmethod
def get_cookie_cache(cls):
if cls._Cookie_cache is None:
with _cache_init_lock:
cls._initialise()
return cls._Cookie_cache
@classmethod
def _initialise(cls, cache_dir=None):
cls._Cookie_cache = _CookieCache()
class _CookieDBManager:
_db = None
_cache_dir = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
@classmethod
def get_database(cls):
if cls._db is None:
cls._initialise()
return cls._db
@classmethod
def close_db(cls):
if cls._db is not None:
try:
cls._db.close()
except Exception:
# Must discard exceptions because Python trying to quit.
pass
@classmethod
def _initialise(cls, cache_dir=None):
if cache_dir is not None:
cls._cache_dir = cache_dir
if not _os.path.isdir(cls._cache_dir):
try:
_os.makedirs(cls._cache_dir)
except OSError as err:
raise _CookieCacheException(f"Error creating CookieCache folder: '{cls._cache_dir}' reason: {err}")
elif not (_os.access(cls._cache_dir, _os.R_OK) and _os.access(cls._cache_dir, _os.W_OK)):
raise _CookieCacheException(f"Cannot read and write in CookieCache folder: '{cls._cache_dir}'")
cls._db = _peewee.SqliteDatabase(
_os.path.join(cls._cache_dir, 'cookies.db'),
pragmas={'journal_mode': 'wal', 'cache_size': -64}
)
@classmethod
def set_location(cls, new_cache_dir):
if cls._db is not None:
cls._db.close()
cls._db = None
cls._cache_dir = new_cache_dir
@classmethod
def get_location(cls):
return cls._cache_dir
# close DB when Python exists
_atexit.register(_CookieDBManager.close_db)
Cookie_db_proxy = _peewee.Proxy()
class ISODateTimeField(_peewee.DateTimeField):
# Ensure Python datetime is read & written correctly for sqlite,
# because user discovered peewee allowed an invalid datetime
# to get written.
def db_value(self, value):
if value and isinstance(value, _dt.datetime):
return value.isoformat()
return super().db_value(value)
def python_value(self, value):
if value and isinstance(value, str) and 'T' in value:
return _dt.datetime.fromisoformat(value)
return super().python_value(value)
class _CookieSchema(_peewee.Model):
strategy = _peewee.CharField(primary_key=True)
fetch_date = ISODateTimeField(default=_dt.datetime.now)
# Which cookie type depends on strategy
cookie_bytes = _peewee.BlobField()
class Meta:
database = Cookie_db_proxy
without_rowid = True
class _CookieCache:
def __init__(self):
self.initialised = -1
self.db = None
self.dummy = False
def get_db(self):
if self.db is not None:
return self.db
try:
self.db = _CookieDBManager.get_database()
except _CookieCacheException as err:
get_yf_logger().info(f"Failed to create CookieCache, reason: {err}. "
"CookieCache will not be used. "
"Tip: You can direct cache to use a different location with 'set_tz_cache_location(mylocation)'")
self.dummy = True
return None
return self.db
def initialise(self):
if self.initialised != -1:
return
db = self.get_db()
if db is None:
self.initialised = 0 # failure
return
db.connect()
Cookie_db_proxy.initialize(db)
try:
db.create_tables([_CookieSchema])
except _peewee.OperationalError as e:
if 'WITHOUT' in str(e):
_CookieSchema._meta.without_rowid = False
db.create_tables([_CookieSchema])
else:
raise
self.initialised = 1 # success
def lookup(self, strategy):
if self.dummy:
return None
if self.initialised == -1:
self.initialise()
if self.initialised == 0: # failure
return None
try:
data = _CookieSchema.get(_CookieSchema.strategy == strategy)
cookie = _pkl.loads(data.cookie_bytes)
return {'cookie':cookie, 'age':_dt.datetime.now()-data.fetch_date}
except _CookieSchema.DoesNotExist:
return None
def store(self, strategy, cookie):
if self.dummy:
return
if self.initialised == -1:
self.initialise()
if self.initialised == 0: # failure
return
db = self.get_db()
if db is None:
return
try:
q = _CookieSchema.delete().where(_CookieSchema.strategy == strategy)
q.execute()
if cookie is None:
return
with db.atomic():
cookie_pkl = _pkl.dumps(cookie, _pkl.HIGHEST_PROTOCOL)
_CookieSchema.insert(strategy=strategy, cookie_bytes=cookie_pkl).execute()
except _peewee.IntegrityError:
raise
# # Integrity error means the strategy already exists. Try updating the strategy.
# old_value = self.lookup(strategy)
# if old_value != cookie:
# get_yf_logger().debug(f"cookie for strategy {strategy} changed from {old_value} to {cookie}.")
# with db.atomic():
# q = _CookieSchema.update(cookie=cookie).where(_CookieSchema.strategy == strategy)
# q.execute()
def get_cookie_cache():
return _CookieCacheManager.get_cookie_cache()
# --------------
# ISIN cache
# --------------
class _ISINCacheException(Exception):
pass
class _ISINCacheDummy:
"""Dummy cache to use if isin cache is disabled"""
def lookup(self, isin):
return None
def store(self, isin, tkr):
pass
@property
def tz_db(self):
return None
class _ISINCacheManager:
_isin_cache = None
@classmethod
def get_isin_cache(cls):
if cls._isin_cache is None:
with _cache_init_lock:
cls._initialise()
return cls._isin_cache
@classmethod
def _initialise(cls, cache_dir=None):
cls._isin_cache = _ISINCache()
class _ISINDBManager:
_db = None
_cache_dir = _os.path.join(_ad.user_cache_dir(), "py-yfinance")
@classmethod
def get_database(cls):
if cls._db is None:
cls._initialise()
return cls._db
@classmethod
def close_db(cls):
if cls._db is not None:
try:
cls._db.close()
except Exception:
# Must discard exceptions because Python trying to quit.
pass
@classmethod
def _initialise(cls, cache_dir=None):
if cache_dir is not None:
cls._cache_dir = cache_dir
if not _os.path.isdir(cls._cache_dir):
try:
_os.makedirs(cls._cache_dir)
except OSError as err:
raise _ISINCacheException(f"Error creating ISINCache folder: '{cls._cache_dir}' reason: {err}")
elif not (_os.access(cls._cache_dir, _os.R_OK) and _os.access(cls._cache_dir, _os.W_OK)):
raise _ISINCacheException(f"Cannot read and write in ISINCache folder: '{cls._cache_dir}'")
cls._db = _peewee.SqliteDatabase(
_os.path.join(cls._cache_dir, 'isin-tkr.db'),
pragmas={'journal_mode': 'wal', 'cache_size': -64}
)
@classmethod
def set_location(cls, new_cache_dir):
if cls._db is not None:
cls._db.close()
cls._db = None
cls._cache_dir = new_cache_dir
@classmethod
def get_location(cls):
return cls._cache_dir
# close DB when Python exists
_atexit.register(_ISINDBManager.close_db)
isin_db_proxy = _peewee.Proxy()
class _ISIN_KV(_peewee.Model):
key = _peewee.CharField(primary_key=True)
value = _peewee.CharField(null=True)
created_at = _peewee.DateTimeField(default=_dt.datetime.now)
class Meta:
database = isin_db_proxy
without_rowid = True
class _ISINCache:
def __init__(self):
self.initialised = -1
self.db = None
self.dummy = False
def get_db(self):
if self.db is not None:
return self.db
try:
self.db = _ISINDBManager.get_database()
except _ISINCacheException as err:
get_yf_logger().info(f"Failed to create ISINCache, reason: {err}. "
"ISINCache will not be used. "
"Tip: You can direct cache to use a different location with 'set_isin_cache_location(mylocation)'")
self.dummy = True
return None
return self.db
def initialise(self):
if self.initialised != -1:
return
db = self.get_db()
if db is None:
self.initialised = 0 # failure
return
db.connect()
isin_db_proxy.initialize(db)
try:
db.create_tables([_ISIN_KV])
except _peewee.OperationalError as e:
if 'WITHOUT' in str(e):
_ISIN_KV._meta.without_rowid = False
db.create_tables([_ISIN_KV])
else:
raise
self.initialised = 1 # success
def lookup(self, key):
if self.dummy:
return None
if self.initialised == -1:
self.initialise()
if self.initialised == 0: # failure
return None
try:
return _ISIN_KV.get(_ISIN_KV.key == key).value
except _ISIN_KV.DoesNotExist:
return None
def store(self, key, value):
if self.dummy:
return
if self.initialised == -1:
self.initialise()
if self.initialised == 0: # failure
return
db = self.get_db()
if db is None:
return
try:
if value is None:
q = _ISIN_KV.delete().where(_ISIN_KV.key == key)
q.execute()
return
# Remove existing rows with same value that are older than 1 week
one_week_ago = _dt.datetime.now() - _dt.timedelta(weeks=1)
old_rows_query = _ISIN_KV.delete().where(
(_ISIN_KV.value == value) &
(_ISIN_KV.created_at < one_week_ago)
)
old_rows_query.execute()
with db.atomic():
_ISIN_KV.insert(key=key, value=value).execute()
except _peewee.IntegrityError:
# Integrity error means the key already exists. Try updating the key.
old_value = self.lookup(key)
if old_value != value:
get_yf_logger().debug(f"Value for key {key} changed from {old_value} to {value}.")
with db.atomic():
q = _ISIN_KV.update(value=value, created_at=_dt.datetime.now()).where(_ISIN_KV.key == key)
q.execute()
def get_isin_cache():
return _ISINCacheManager.get_isin_cache()
# --------------
# Utils
# --------------
def set_cache_location(cache_dir: str):
"""
Sets the path to create the "py-yfinance" cache folder in.
Useful if the default folder returned by "appdir.user_cache_dir()" is not writable.
Must be called before cache is used (that is, before fetching tickers).
:param cache_dir: Path to use for caches
:return: None
"""
_TzDBManager.set_location(cache_dir)
_CookieDBManager.set_location(cache_dir)
_ISINDBManager.set_location(cache_dir)
def set_tz_cache_location(cache_dir: str):
set_cache_location(cache_dir)
@@ -0,0 +1,547 @@
from __future__ import annotations # Just in case
import json
from typing import Any, Optional, List, Union, Dict
import warnings
import numpy as np
from requests import Session, Response, exceptions
import pandas as pd
from datetime import datetime, date, timedelta
from .const import _QUERY1_URL_
from .utils import log_indent_decorator, get_yf_logger, _parse_user_dt
from .screener import screen
from .data import YfData
from .exceptions import YFException
class CalendarQuery:
"""
Simple CalendarQuery class for calendar queries, similar to yf.screener.query.QueryBase.
Simple operand accepted by YF is of the form:
`{ "operator": operator, "operands": [field, ...values] }`
Nested operand accepted by YF:
`{ "operator": operator, "operands": [ ...CalendarQuery ] }`
### Simple example:
```python
op = CalendarQuery('eq', ['ticker', 'AAPL'])
print(op.to_dict())
```
"""
def __init__(self, operator: str, operand: Union[List[Any], List["CalendarQuery"]]):
"""
:param operator: Operator string, e.g., 'eq', 'gte', 'and', 'or'.
:param operand: List of operands: can be values (str, int), or other Operands instances (nested).
"""
operator = operator.upper()
self.operator = operator
self.operands = operand
def append(self, operand: Any) -> None:
"""
Append an operand to the operands list.
:param operand: CalendarQuery to append (can be value or CalendarQuery instance).
"""
self.operands.append(operand)
@property
def is_empty(self) -> bool:
"""
Check if the operands list is empty.
:return: True if operands list is empty, False otherwise.
"""
return len(self.operands) == 0
def to_dict(self) -> dict:
"""
Query-ready dict for YF.
Simple operand accepted by YF is of the form:
`{ "operator": operator, "operands": [field, ...values] }`
Nested operand accepted by YF:
`{ "operator": operator, "operands": [ ...CalendarQuery ] }`
"""
op = self.operator
ops = self.operands
return {
"operator": op,
"operands": [o.to_dict() if isinstance(o, CalendarQuery) else o for o in ops],
}
_CALENDAR_URL_ = f"{_QUERY1_URL_}/v1/finance/visualization"
DATE_STR_FORMAT = "%Y-%m-%d"
PREDEFINED_CALENDARS = {
"sp_earnings": {
"sortField": "intradaymarketcap",
"includeFields": [
"ticker",
"companyshortname",
"intradaymarketcap",
"eventname",
"startdatetime",
"startdatetimetype",
"epsestimate",
"epsactual",
"epssurprisepct",
],
"nan_cols": ["Surprise (%)", "EPS Estimate", "Reported EPS"],
"datetime_cols": ["Event Start Date"],
"df_index": "Symbol",
"renames": {
"Surprise (%)": "Surprise(%)",
"Company Name": "Company",
"Market Cap (Intraday)": "Marketcap",
},
},
"ipo_info": {
"sortField": "startdatetime",
"includeFields": [
"ticker",
"companyshortname",
"exchange_short_name",
"filingdate",
"startdatetime",
"amendeddate",
"pricefrom",
"priceto",
"offerprice",
"currencyname",
"shares",
"dealtype",
],
"nan_cols": ["Price From", "Price To", "Price", "Shares"],
"datetime_cols": ["Filing Date", "Date", "Amended Date"],
"df_index": "Symbol",
"renames": {
"Exchange Short Name": "Exchange",
},
},
"economic_event": {
"sortField": "startdatetime",
"includeFields": [
"econ_release",
"country_code",
"startdatetime",
"period",
"after_release_actual",
"consensus_estimate",
"prior_release_actual",
"originally_reported_actual",
],
"nan_cols": ["Actual", "Market Expectation", "Prior to This", "Revised from"],
"datetime_cols": ["Event Time"],
"df_index": "Event",
"renames": {
"Country Code": "Region",
"Market Expectation": "Expected",
"Prior to This": "Last",
"Revised from": "Revised",
},
},
"splits": {
"sortField": "startdatetime",
"includeFields": [
"ticker",
"companyshortname",
"startdatetime",
"optionable",
"old_share_worth",
"share_worth",
],
"nan_cols": [],
"datetime_cols": ["Payable On"],
"df_index": "Symbol",
"renames": {
"Optionable?": "Optionable",
},
},
}
class Calendars:
"""
Get economic calendars, for example, Earnings, IPO, Economic Events, Splits
### Simple example default params:
```python
import yfinance as yf
calendars = yf.Calendars()
earnings_calendar = calendars.get_earnings_calendar(limit=50)
print(earnings_calendar)
```"""
def __init__(
self,
start: Optional[Union[str, datetime, date]] = None,
end: Optional[Union[str, datetime, date]] = None,
session: Optional[Session] = None,
):
"""
:param str | datetime | date start: start date (default today) \
eg. start="2025-11-08"
:param str | datetime | date end: end date (default `start + 7 days`) \
eg. end="2025-11-08"
:param session: requests.Session object, optional
"""
self._logger = get_yf_logger()
self.session = session or Session()
self._data: YfData = YfData(session=session)
_start = self._parse_date_param(start)
_end = self._parse_date_param(end)
self._start = _start or datetime.now().strftime(DATE_STR_FORMAT)
self._end = _end or (datetime.strptime(self._start, DATE_STR_FORMAT) + timedelta(days=7)).strftime(DATE_STR_FORMAT)
if not start and end:
self._logger.debug(f"Incomplete boundary: did not provide `start`, using today {self._start=} to {self._end=}")
elif start and not end:
self._logger.debug(f"Incomplete boundary: did not provide `end`, using {self._start=} to {self._end=}: +7 days from self._start")
self._most_active_qy: CalendarQuery = CalendarQuery("or", [])
self._cache_request_body = {}
self.calendars: Dict[str, pd.DataFrame] = {}
def _parse_date_param(self, _date: Optional[Union[str, datetime, date, int]]) -> str:
if not _date:
return ""
else:
return _parse_user_dt(_date).strftime(DATE_STR_FORMAT)
def _get_data(
self, calendar_type: str, query: CalendarQuery, limit=12, offset=0, force=False
) -> pd.DataFrame:
if calendar_type not in PREDEFINED_CALENDARS:
raise YFException(f"Unknown calendar type: {calendar_type}")
params = {"lang": "en-US", "region": "US"}
body = {
"sortType": "DESC",
"entityIdType": calendar_type,
"sortField": PREDEFINED_CALENDARS[calendar_type]["sortField"],
"includeFields": PREDEFINED_CALENDARS[calendar_type]["includeFields"],
"size": min(limit, 100), # YF caps at 100, don't go higher
"offset": offset,
"query": query.to_dict(),
}
if self._cache_request_body.get(calendar_type, None) and not force:
cache_body = self._cache_request_body[calendar_type]
if cache_body == body and calendar_type in self.calendars:
# Uses cache if force=False and new request has same body as previous
self._logger.debug(f"Getting {calendar_type=} from local cache")
return self.calendars[calendar_type]
self._cache_request_body[calendar_type] = body
self._logger.debug(f"Fetching {calendar_type=} with {limit=}")
response: Response = self._data.post(_CALENDAR_URL_, params=params, body=body)
try:
json_data = response.json()
except json.JSONDecodeError:
self._logger.error(f"{calendar_type}: Failed to retrieve calendar.")
json_data = {}
# Error returned
if json_data.get("finance", {}).get("error", {}):
raise YFException(json_data.get("finance", {}).get("error", {}))
self.calendars[calendar_type] = self._create_df(json_data)
return self._cleanup_df(calendar_type)
def _create_df(self, json_data: dict) -> pd.DataFrame:
columns = []
for col in json_data["finance"]["result"][0]["documents"][0]["columns"]:
columns.append(col["label"])
if col["label"] == "Event Start Date" and col["type"] == "STRING":
# Rename duplicate columns Event Start Date
columns[-1] = "Timing"
rows = json_data["finance"]["result"][0]["documents"][0]["rows"]
return pd.DataFrame(rows, columns=columns)
def _cleanup_df(self, calendar_type: str) -> pd.DataFrame:
predef_cal: dict = PREDEFINED_CALENDARS[calendar_type]
df: pd.DataFrame = self.calendars[calendar_type]
if df.empty:
return df
# Convert types
nan_cols: list = predef_cal["nan_cols"]
if nan_cols:
df[nan_cols] = df[nan_cols].astype("float64").replace(0.0, np.nan)
# Format the dataframe
df.set_index(predef_cal["df_index"], inplace=True)
for rename_from, rename_to in predef_cal["renames"].items():
df.rename(columns={rename_from: rename_to}, inplace=True)
for datetime_col in predef_cal["datetime_cols"]:
df[datetime_col] = pd.to_datetime(df[datetime_col])
return df
@log_indent_decorator
def _get_most_active_operands(
self, _market_cap: Optional[float], force=False
) -> CalendarQuery:
"""
Retrieve tickers from YF, converts them into operands accepted by YF.
Saves the operands in self._most_active_qy.
Will not re-query if already populated.
Used for earnings calendar optional filter.
:param force: if True, will re-query even if operands already exist
:return: list of operands for active traded stocks
"""
if not self._most_active_qy.is_empty and not force:
return self._most_active_qy
self._logger.debug("Fetching 200 most_active for earnings calendar")
try:
json_raw: dict = screen(query="MOST_ACTIVES", count=200)
except exceptions.HTTPError:
self._logger.error("Failed to retrieve most active stocks.")
return self._most_active_qy
raw = json_raw.get("quotes", [{}])
self._most_active_qy = CalendarQuery("or", [])
for stock in raw:
if type(stock) is not dict:
continue
ticker = stock.get("symbol", "")
t_market_cap = stock.get("marketCap", 0)
# We filter market_cap here because we want to keep self._most_active_qy consistent
if ticker and (_market_cap is None or t_market_cap >= _market_cap):
self._most_active_qy.append(CalendarQuery("eq", ["ticker", ticker]))
return self._most_active_qy
def _get_startdatetime_operators(self, start=None, end=None) -> CalendarQuery:
"""
Get startdatetime operands for start/end dates.
If no dates passed, defaults to internal date set on initialization.
"""
_start = self._parse_date_param(start)
_end = self._parse_date_param(end)
if (start and not end) or (end and not start):
warnings.warn(
"When prividing custom `start` and `end` parameters, you may want to specify both, to avoid unexpected behaviour.",
UserWarning,
stacklevel=2,
)
return CalendarQuery(
"and",
[
CalendarQuery("gte", ["startdatetime", _start or self._start]),
CalendarQuery("lte", ["startdatetime", _end or self._end]),
],
)
### Manual getter functions:
@log_indent_decorator
def get_earnings_calendar(
self,
market_cap: Optional[float] = None,
filter_most_active: bool = True,
start=None,
end=None,
limit=12,
offset=0,
force=False,
) -> pd.DataFrame:
"""
Retrieve earnings calendar from YF as a DataFrame.
Will re-query every time it is called, overwriting previous data.
:param market_cap: market cap cutoff in USD, default None
:param filter_most_active: will filter for actively traded stocks (default True)
:param str | datetime | date start: overwrite start date (default set by __init__) \
eg. start="2025-11-08"
:param str | datetime | date end: overwrite end date (default set by __init__) \
eg. end="2025-11-08"
:param limit: maximum number of results to return (YF caps at 100)
:param offset: offsets the results for pagination. YF default 0
:param force: if True, will re-query even if cache already exists
:return: DataFrame with earnings calendar
"""
_start = self._parse_date_param(start)
_end = self._parse_date_param(end)
if (start and not end) or (end and not start):
warnings.warn(
"When prividing custom `start` and `end` parameters, you may want to specify both, to avoid unexpected behaviour.",
UserWarning,
stacklevel=2,
)
query = CalendarQuery(
"and",
[
CalendarQuery("eq", ["region", "us"]),
CalendarQuery(
"or",
[
CalendarQuery("eq", ["eventtype", "EAD"]),
CalendarQuery("eq", ["eventtype", "ERA"]),
],
),
CalendarQuery("gte", ["startdatetime", _start or self._start]),
CalendarQuery("lte", ["startdatetime", _end or self._end]),
],
)
if market_cap is not None:
if market_cap < 10_000_000:
warnings.warn(
f"market_cap {market_cap} is very low, did you mean to set it higher?",
UserWarning,
stacklevel=2,
)
query.append(CalendarQuery("gte", ["intradaymarketcap", market_cap]))
if filter_most_active and not offset:
# YF does not like filter most active while offsetting
query.append(self._get_most_active_operands(market_cap))
return self._get_data(
calendar_type="sp_earnings",
query=query,
limit=limit,
offset=offset,
force=force,
)
@log_indent_decorator
def get_ipo_info_calendar(
self, start=None, end=None, limit=12, offset=0, force=False
) -> pd.DataFrame:
"""
Retrieve IPOs calendar from YF as a Dataframe.
:param str | datetime | date start: overwrite start date (default set by __init__) \
eg. start="2025-11-08"
:param str | datetime | date end: overwrite end date (default set by __init__) \
eg. end="2025-11-08"
:param limit: maximum number of results to return (YF caps at 100)
:param offset: offsets the results for pagination. YF default 0
:param force: if True, will re-query even if cache already exists
:return: DataFrame with IPOs calendar
"""
_start = self._parse_date_param(start)
_end = self._parse_date_param(end)
if (start and not end) or (end and not start):
warnings.warn(
"When prividing custom `start` and `end` parameters, you may want to specify both, to avoid unexpected behaviour.",
UserWarning,
stacklevel=2,
)
query = CalendarQuery(
"or",
[
CalendarQuery("gtelt", ["startdatetime", _start or self._start, _end or self._end]),
CalendarQuery("gtelt", ["filingdate", _start or self._start, _end or self._end]),
CalendarQuery("gtelt", ["amendeddate", _start or self._start, _end or self._end]),
],
)
return self._get_data(
calendar_type="ipo_info",
query=query,
limit=limit,
offset=offset,
force=force,
)
@log_indent_decorator
def get_economic_events_calendar(
self, start=None, end=None, limit=12, offset=0, force=False
) -> pd.DataFrame:
"""
Retrieve Economic Events calendar from YF as a DataFrame.
:param str | datetime | date start: overwrite start date (default set by __init__) \
eg. start="2025-11-08"
:param str | datetime | date end: overwrite end date (default set by __init__) \
eg. end="2025-11-08"
:param limit: maximum number of results to return (YF caps at 100)
:param offset: offsets the results for pagination. YF default 0
:param force: if True, will re-query even if cache already exists
:return: DataFrame with Economic Events calendar
"""
return self._get_data(
calendar_type="economic_event",
query=self._get_startdatetime_operators(start, end),
limit=limit,
offset=offset,
force=force,
)
@log_indent_decorator
def get_splits_calendar(
self, start=None, end=None, limit=12, offset=0, force=False
) -> pd.DataFrame:
"""
Retrieve Splits calendar from YF as a DataFrame.
:param str | datetime | date start: overwrite start date (default set by __init__) \
eg. start="2025-11-08"
:param str | datetime | date end: overwrite end date (default set by __init__) \
eg. end="2025-11-08"
:param limit: maximum number of results to return (YF caps at 100)
:param offset: offsets the results for pagination. YF default 0
:param force: if True, will re-query even if cache already exists
:return: DataFrame with Splits calendar
"""
return self._get_data(
calendar_type="splits",
query=self._get_startdatetime_operators(start, end),
limit=limit,
offset=offset,
force=force,
)
### Easy / Default getter functions:
@property
def earnings_calendar(self) -> pd.DataFrame:
"""Earnings calendar with default settings."""
if "sp_earnings" in self.calendars:
return self.calendars["sp_earnings"]
return self.get_earnings_calendar()
@property
def ipo_info_calendar(self) -> pd.DataFrame:
"""IPOs calendar with default settings."""
if "ipo_info" in self.calendars:
return self.calendars["ipo_info"]
return self.get_ipo_info_calendar()
@property
def economic_events_calendar(self) -> pd.DataFrame:
"""Economic events calendar with default settings."""
if "economic_event" in self.calendars:
return self.calendars["economic_event"]
return self.get_economic_events_calendar()
@property
def splits_calendar(self) -> pd.DataFrame:
"""Splits calendar with default settings."""
if "splits" in self.calendars:
return self.calendars["splits"]
return self.get_splits_calendar()
@@ -0,0 +1,58 @@
import json
class NestedConfig:
def __init__(self, name, data):
self.__dict__['name'] = name
self.__dict__['data'] = data
def __getattr__(self, key):
return self.data.get(key)
def __setattr__(self, key, value):
self.data[key] = value
def __len__(self):
return len(self.__dict__['data'])
def __repr__(self):
return json.dumps(self.data, indent=4)
class ConfigMgr:
def __init__(self):
self._initialised = False
def _load_option(self):
self._initialised = True # prevent infinite loop
self.options = {}
# Initialise defaults
n = self.__getattr__('network')
n.proxy = None
n.retries = 0
d = self.__getattr__('debug')
d.hide_exceptions = True
d.logging = False
def __getattr__(self, key):
if not self._initialised:
self._load_option()
if key not in self.options:
self.options[key] = {}
return NestedConfig(key, self.options[key])
def __contains__(self, key):
if not self._initialised:
self._load_option()
return key in self.options
def __repr__(self):
if not self._initialised:
self._load_option()
all_options = self.options.copy()
return json.dumps(all_options, indent=4)
YfConfig = ConfigMgr()
@@ -0,0 +1,719 @@
_QUERY1_URL_ = 'https://query1.finance.yahoo.com'
_BASE_URL_ = 'https://query2.finance.yahoo.com'
_ROOT_URL_ = 'https://finance.yahoo.com'
_SENTINEL_ = object()
fundamentals_keys = {
'financials': ["TaxEffectOfUnusualItems", "TaxRateForCalcs", "NormalizedEBITDA", "NormalizedDilutedEPS",
"NormalizedBasicEPS", "TotalUnusualItems", "TotalUnusualItemsExcludingGoodwill",
"NetIncomeFromContinuingOperationNetMinorityInterest", "ReconciledDepreciation",
"ReconciledCostOfRevenue", "EBITDA", "EBIT", "NetInterestIncome", "InterestExpense",
"InterestIncome", "ContinuingAndDiscontinuedDilutedEPS", "ContinuingAndDiscontinuedBasicEPS",
"NormalizedIncome", "NetIncomeFromContinuingAndDiscontinuedOperation", "TotalExpenses",
"RentExpenseSupplemental", "ReportedNormalizedDilutedEPS", "ReportedNormalizedBasicEPS",
"TotalOperatingIncomeAsReported", "DividendPerShare", "DilutedAverageShares", "BasicAverageShares",
"DilutedEPS", "DilutedEPSOtherGainsLosses", "TaxLossCarryforwardDilutedEPS",
"DilutedAccountingChange", "DilutedExtraordinary", "DilutedDiscontinuousOperations",
"DilutedContinuousOperations", "BasicEPS", "BasicEPSOtherGainsLosses", "TaxLossCarryforwardBasicEPS",
"BasicAccountingChange", "BasicExtraordinary", "BasicDiscontinuousOperations",
"BasicContinuousOperations", "DilutedNIAvailtoComStockholders", "AverageDilutionEarnings",
"NetIncomeCommonStockholders", "OtherunderPreferredStockDividend", "PreferredStockDividends",
"NetIncome", "MinorityInterests", "NetIncomeIncludingNoncontrollingInterests",
"NetIncomeFromTaxLossCarryforward", "NetIncomeExtraordinary", "NetIncomeDiscontinuousOperations",
"NetIncomeContinuousOperations", "EarningsFromEquityInterestNetOfTax", "TaxProvision",
"PretaxIncome", "OtherIncomeExpense", "OtherNonOperatingIncomeExpenses", "SpecialIncomeCharges",
"GainOnSaleOfPPE", "GainOnSaleOfBusiness", "OtherSpecialCharges", "WriteOff",
"ImpairmentOfCapitalAssets", "RestructuringAndMergernAcquisition", "SecuritiesAmortization",
"EarningsFromEquityInterest", "GainOnSaleOfSecurity", "NetNonOperatingInterestIncomeExpense",
"TotalOtherFinanceCost", "InterestExpenseNonOperating", "InterestIncomeNonOperating",
"OperatingIncome", "OperatingExpense", "OtherOperatingExpenses", "OtherTaxes",
"ProvisionForDoubtfulAccounts", "DepreciationAmortizationDepletionIncomeStatement",
"DepletionIncomeStatement", "DepreciationAndAmortizationInIncomeStatement", "Amortization",
"AmortizationOfIntangiblesIncomeStatement", "DepreciationIncomeStatement", "ResearchAndDevelopment",
"SellingGeneralAndAdministration", "SellingAndMarketingExpense", "GeneralAndAdministrativeExpense",
"OtherGandA", "InsuranceAndClaims", "RentAndLandingFees", "SalariesAndWages", "GrossProfit",
"CostOfRevenue", "TotalRevenue", "ExciseTaxes", "OperatingRevenue", "LossAdjustmentExpense",
"NetPolicyholderBenefitsAndClaims", "PolicyholderBenefitsGross", "PolicyholderBenefitsCeded",
"OccupancyAndEquipment", "ProfessionalExpenseAndContractServicesExpense", "OtherNonInterestExpense"],
'balance-sheet': ["TreasurySharesNumber", "PreferredSharesNumber", "OrdinarySharesNumber", "ShareIssued", "NetDebt",
"TotalDebt", "TangibleBookValue", "InvestedCapital", "WorkingCapital", "NetTangibleAssets",
"CapitalLeaseObligations", "CommonStockEquity", "PreferredStockEquity", "TotalCapitalization",
"TotalEquityGrossMinorityInterest", "MinorityInterest", "StockholdersEquity",
"OtherEquityInterest", "GainsLossesNotAffectingRetainedEarnings", "OtherEquityAdjustments",
"FixedAssetsRevaluationReserve", "ForeignCurrencyTranslationAdjustments",
"MinimumPensionLiabilities", "UnrealizedGainLoss", "TreasuryStock", "RetainedEarnings",
"AdditionalPaidInCapital", "CapitalStock", "OtherCapitalStock", "CommonStock", "PreferredStock",
"TotalPartnershipCapital", "GeneralPartnershipCapital", "LimitedPartnershipCapital",
"TotalLiabilitiesNetMinorityInterest", "TotalNonCurrentLiabilitiesNetMinorityInterest",
"OtherNonCurrentLiabilities", "LiabilitiesHeldforSaleNonCurrent", "RestrictedCommonStock",
"PreferredSecuritiesOutsideStockEquity", "DerivativeProductLiabilities", "EmployeeBenefits",
"NonCurrentPensionAndOtherPostretirementBenefitPlans", "NonCurrentAccruedExpenses",
"DuetoRelatedPartiesNonCurrent", "TradeandOtherPayablesNonCurrent",
"NonCurrentDeferredLiabilities", "NonCurrentDeferredRevenue",
"NonCurrentDeferredTaxesLiabilities", "LongTermDebtAndCapitalLeaseObligation",
"LongTermCapitalLeaseObligation", "LongTermDebt", "LongTermProvisions", "CurrentLiabilities",
"OtherCurrentLiabilities", "CurrentDeferredLiabilities", "CurrentDeferredRevenue",
"CurrentDeferredTaxesLiabilities", "CurrentDebtAndCapitalLeaseObligation",
"CurrentCapitalLeaseObligation", "CurrentDebt", "OtherCurrentBorrowings", "LineOfCredit",
"CommercialPaper", "CurrentNotesPayable", "PensionandOtherPostRetirementBenefitPlansCurrent",
"CurrentProvisions", "PayablesAndAccruedExpenses", "CurrentAccruedExpenses", "InterestPayable",
"Payables", "OtherPayable", "DuetoRelatedPartiesCurrent", "DividendsPayable", "TotalTaxPayable",
"IncomeTaxPayable", "AccountsPayable", "TotalAssets", "TotalNonCurrentAssets",
"OtherNonCurrentAssets", "DefinedPensionBenefit", "NonCurrentPrepaidAssets",
"NonCurrentDeferredAssets", "NonCurrentDeferredTaxesAssets", "DuefromRelatedPartiesNonCurrent",
"NonCurrentNoteReceivables", "NonCurrentAccountsReceivable", "FinancialAssets",
"InvestmentsAndAdvances", "OtherInvestments", "InvestmentinFinancialAssets",
"HeldToMaturitySecurities", "AvailableForSaleSecurities",
"FinancialAssetsDesignatedasFairValueThroughProfitorLossTotal", "TradingSecurities",
"LongTermEquityInvestment", "InvestmentsinJointVenturesatCost",
"InvestmentsInOtherVenturesUnderEquityMethod", "InvestmentsinAssociatesatCost",
"InvestmentsinSubsidiariesatCost", "InvestmentProperties", "GoodwillAndOtherIntangibleAssets",
"OtherIntangibleAssets", "Goodwill", "NetPPE", "AccumulatedDepreciation", "GrossPPE", "Leases",
"ConstructionInProgress", "OtherProperties", "MachineryFurnitureEquipment",
"BuildingsAndImprovements", "LandAndImprovements", "Properties", "CurrentAssets",
"OtherCurrentAssets", "HedgingAssetsCurrent", "AssetsHeldForSaleCurrent", "CurrentDeferredAssets",
"CurrentDeferredTaxesAssets", "RestrictedCash", "PrepaidAssets", "Inventory",
"InventoriesAdjustmentsAllowances", "OtherInventories", "FinishedGoods", "WorkInProcess",
"RawMaterials", "Receivables", "ReceivablesAdjustmentsAllowances", "OtherReceivables",
"DuefromRelatedPartiesCurrent", "TaxesReceivable", "AccruedInterestReceivable", "NotesReceivable",
"LoansReceivable", "AccountsReceivable", "AllowanceForDoubtfulAccountsReceivable",
"GrossAccountsReceivable", "CashCashEquivalentsAndShortTermInvestments",
"OtherShortTermInvestments", "CashAndCashEquivalents", "CashEquivalents", "CashFinancial",
"CashCashEquivalentsAndFederalFundsSold"],
'cash-flow': ["ForeignSales", "DomesticSales", "AdjustedGeographySegmentData", "FreeCashFlow",
"RepurchaseOfCapitalStock", "RepaymentOfDebt", "IssuanceOfDebt", "IssuanceOfCapitalStock",
"CapitalExpenditure", "InterestPaidSupplementalData", "IncomeTaxPaidSupplementalData",
"EndCashPosition", "OtherCashAdjustmentOutsideChangeinCash", "BeginningCashPosition",
"EffectOfExchangeRateChanges", "ChangesInCash", "OtherCashAdjustmentInsideChangeinCash",
"CashFlowFromDiscontinuedOperation", "FinancingCashFlow", "CashFromDiscontinuedFinancingActivities",
"CashFlowFromContinuingFinancingActivities", "NetOtherFinancingCharges", "InterestPaidCFF",
"ProceedsFromStockOptionExercised", "CashDividendsPaid", "PreferredStockDividendPaid",
"CommonStockDividendPaid", "NetPreferredStockIssuance", "PreferredStockPayments",
"PreferredStockIssuance", "NetCommonStockIssuance", "CommonStockPayments", "CommonStockIssuance",
"NetIssuancePaymentsOfDebt", "NetShortTermDebtIssuance", "ShortTermDebtPayments",
"ShortTermDebtIssuance", "NetLongTermDebtIssuance", "LongTermDebtPayments", "LongTermDebtIssuance",
"InvestingCashFlow", "CashFromDiscontinuedInvestingActivities",
"CashFlowFromContinuingInvestingActivities", "NetOtherInvestingChanges", "InterestReceivedCFI",
"DividendsReceivedCFI", "NetInvestmentPurchaseAndSale", "SaleOfInvestment", "PurchaseOfInvestment",
"NetInvestmentPropertiesPurchaseAndSale", "SaleOfInvestmentProperties",
"PurchaseOfInvestmentProperties", "NetBusinessPurchaseAndSale", "SaleOfBusiness",
"PurchaseOfBusiness", "NetIntangiblesPurchaseAndSale", "SaleOfIntangibles", "PurchaseOfIntangibles",
"NetPPEPurchaseAndSale", "SaleOfPPE", "PurchaseOfPPE", "CapitalExpenditureReported",
"OperatingCashFlow", "CashFromDiscontinuedOperatingActivities",
"CashFlowFromContinuingOperatingActivities", "TaxesRefundPaid", "InterestReceivedCFO",
"InterestPaidCFO", "DividendReceivedCFO", "DividendPaidCFO", "ChangeInWorkingCapital",
"ChangeInOtherWorkingCapital", "ChangeInOtherCurrentLiabilities", "ChangeInOtherCurrentAssets",
"ChangeInPayablesAndAccruedExpense", "ChangeInAccruedExpense", "ChangeInInterestPayable",
"ChangeInPayable", "ChangeInDividendPayable", "ChangeInAccountPayable", "ChangeInTaxPayable",
"ChangeInIncomeTaxPayable", "ChangeInPrepaidAssets", "ChangeInInventory", "ChangeInReceivables",
"ChangesInAccountReceivables", "OtherNonCashItems", "ExcessTaxBenefitFromStockBasedCompensation",
"StockBasedCompensation", "UnrealizedGainLossOnInvestmentSecurities", "ProvisionandWriteOffofAssets",
"AssetImpairmentCharge", "AmortizationOfSecurities", "DeferredTax", "DeferredIncomeTax",
"DepreciationAmortizationDepletion", "Depletion", "DepreciationAndAmortization",
"AmortizationCashFlow", "AmortizationOfIntangibles", "Depreciation", "OperatingGainsLosses",
"PensionAndEmployeeBenefitExpense", "EarningsLossesFromEquityInvestments",
"GainLossOnInvestmentSecurities", "NetForeignCurrencyExchangeGainLoss", "GainLossOnSaleOfPPE",
"GainLossOnSaleOfBusiness", "NetIncomeFromContinuingOperations",
"CashFlowsfromusedinOperatingActivitiesDirect", "TaxesRefundPaidDirect", "InterestReceivedDirect",
"InterestPaidDirect", "DividendsReceivedDirect", "DividendsPaidDirect", "ClassesofCashPayments",
"OtherCashPaymentsfromOperatingActivities", "PaymentsonBehalfofEmployees",
"PaymentstoSuppliersforGoodsandServices", "ClassesofCashReceiptsfromOperatingActivities",
"OtherCashReceiptsfromOperatingActivities", "ReceiptsfromGovernmentGrants", "ReceiptsfromCustomers"]}
_PRICE_COLNAMES_ = ['Open', 'High', 'Low', 'Close', 'Adj Close']
quote_summary_valid_modules = (
"summaryProfile", # contains general information about the company
"summaryDetail", # prices + volume + market cap + etc
"assetProfile", # summaryProfile + company officers
"fundProfile",
"price", # current prices
"quoteType", # quoteType
"esgScores", # Environmental, social, and governance (ESG) scores, sustainability and ethical performance of companies
"incomeStatementHistory",
"incomeStatementHistoryQuarterly",
"balanceSheetHistory",
"balanceSheetHistoryQuarterly",
"cashFlowStatementHistory",
"cashFlowStatementHistoryQuarterly",
"defaultKeyStatistics", # KPIs (PE, enterprise value, EPS, EBITA, and more)
"financialData", # Financial KPIs (revenue, gross margins, operating cash flow, free cash flow, and more)
"calendarEvents", # future earnings date
"secFilings", # SEC filings, such as 10K and 10Q reports
"upgradeDowngradeHistory", # upgrades and downgrades that analysts have given a company's stock
"institutionOwnership", # institutional ownership, holders and shares outstanding
"fundOwnership", # mutual fund ownership, holders and shares outstanding
"majorDirectHolders",
"majorHoldersBreakdown",
"insiderTransactions", # insider transactions, such as the number of shares bought and sold by company executives
"insiderHolders", # insider holders, such as the number of shares held by company executives
"netSharePurchaseActivity", # net share purchase activity, such as the number of shares bought and sold by company executives
"earnings", # earnings history
"earningsHistory",
"earningsTrend", # earnings trend
"industryTrend",
"indexTrend",
"sectorTrend",
"recommendationTrend",
"futuresChain",
)
# map last updated as of 2025.12.19
SECTOR_INDUSTY_MAPPING = {
'Basic Materials': {'Specialty Chemicals',
'Gold',
'Building Materials',
'Copper',
'Steel',
'Agricultural Inputs',
'Chemicals',
'Other Industrial Metals & Mining',
'Lumber & Wood Production',
'Aluminum',
'Other Precious Metals & Mining',
'Coking Coal',
'Paper & Paper Products',
'Silver'},
'Communication Services': {'Advertising Agencies',
'Broadcasting',
'Electronic Gaming & Multimedia',
'Entertainment',
'Internet Content & Information',
'Publishing',
'Telecom Services'},
'Consumer Cyclical': {'Apparel Manufacturing',
'Apparel Retail',
'Auto & Truck Dealerships',
'Auto Manufacturers',
'Auto Parts',
'Department Stores',
'Footwear & Accessories',
'Furnishings, Fixtures & Appliances',
'Gambling',
'Home Improvement Retail',
'Internet Retail',
'Leisure',
'Lodging',
'Luxury Goods',
'Packaging & Containers',
'Personal Services',
'Recreational Vehicles',
'Residential Construction',
'Resorts & Casinos',
'Restaurants',
'Specialty Retail',
'Textile Manufacturing',
'Travel Services'},
'Consumer Defensive': {'Beverages—Brewers',
'Beverages—Non-Alcoholic',
'Beverages—Wineries & Distilleries',
'Confectioners',
'Discount Stores',
'Education & Training Services',
'Farm Products',
'Food Distribution',
'Grocery Stores',
'Household & Personal Products',
'Packaged Foods',
'Tobacco'},
'Energy': {'Oil & Gas Drilling',
'Oil & Gas E&P',
'Oil & Gas Equipment & Services',
'Oil & Gas Integrated',
'Oil & Gas Midstream',
'Oil & Gas Refining & Marketing',
'Thermal Coal',
'Uranium'},
'Financial Services': {'Asset Management',
'Banks—Diversified',
'Banks—Regional',
'Capital Markets',
'Credit Services',
'Financial Conglomerates',
'Financial Data & Stock Exchanges',
'Insurance Brokers',
'Insurance—Diversified',
'Insurance—Life',
'Insurance—Property & Casualty',
'Insurance—Reinsurance',
'Insurance—Specialty',
'Mortgage Finance',
'Shell Companies'},
'Healthcare': {'Biotechnology',
'Diagnostics & Research',
'Drug Manufacturers—General',
'Drug Manufacturers—Specialty & Generic',
'Health Information Services',
'Healthcare Plans',
'Medical Care Facilities',
'Medical Devices',
'Medical Instruments & Supplies',
'Medical Distribution',
'Pharmaceutical Retailers'},
'Industrials': {'Aerospace & Defense',
'Airlines',
'Airports & Air Services',
'Building Products & Equipment',
'Business Equipment & Supplies',
'Conglomerates',
'Consulting Services',
'Electrical Equipment & Parts',
'Engineering & Construction',
'Farm & Heavy Construction Machinery',
'Industrial Distribution',
'Infrastructure Operations',
'Integrated Freight & Logistics',
'Marine Shipping',
'Metal Fabrication',
'Pollution & Treatment Controls',
'Railroads',
'Rental & Leasing Services',
'Security & Protection Services',
'Specialty Business Services',
'Specialty Industrial Machinery',
'Staffing & Employment Services',
'Tools & Accessories',
'Trucking',
'Waste Management'},
'Real Estate': {'Real Estate—Development',
'Real Estate Services',
'Real Estate—Diversified',
'REIT—Healthcare Facilities',
'REIT—Hotel & Motel',
'REIT—Industrial',
'REIT—Office',
'REIT—Residential',
'REIT—Retail',
'REIT—Mortgage',
'REIT—Specialty',
'REIT—Diversified'},
'Technology': {'Communication Equipment',
'Computer Hardware',
'Consumer Electronics',
'Electronic Components',
'Electronics & Computer Distribution',
'Information Technology Services',
'Scientific & Technical Instruments',
'Semiconductor Equipment & Materials',
'Semiconductors',
'Software—Application',
'Software—Infrastructure',
'Solar'},
'Utilities': {'Utilities—Diversified',
'Utilities—Independent Power Producers',
'Utilities—Regulated Electric',
'Utilities—Regulated Gas',
'Utilities—Regulated Water',
'Utilities—Renewable'},
}
SECTOR_INDUSTY_MAPPING_LC = {}
for k in SECTOR_INDUSTY_MAPPING.keys():
k2 = k.lower().replace('& ', '').replace('- ', '').replace(', ', ' ').replace(' ', '-')
SECTOR_INDUSTY_MAPPING_LC[k2] = []
for v in SECTOR_INDUSTY_MAPPING[k]:
v2 = v.lower().replace('& ', '').replace('- ', '').replace(', ', ' ').replace(' ', '-')
SECTOR_INDUSTY_MAPPING_LC[k2].append(v2)
# _MIC_TO_YAHOO_SUFFIX maps Market Identifier Codes (MIC) to Yahoo Finance market suffixes.
# c.f. :
# https://help.yahoo.com/kb/finance-for-web/SLN2310.html;_ylt=AwrJKiCZFo9g3Y8AsDWPAwx.;_ylu=Y29sbwMEcG9zAzEEdnRpZAMEc2VjA3Ny?locale=en_US
# https://www.iso20022.org/market-identifier-codes
_MIC_TO_YAHOO_SUFFIX = {
'XCBT': 'CBT', 'XCME': 'CME', 'IFUS': 'NYB', 'CECS': 'CMX', 'XNYM': 'NYM', 'XNYS': '', 'XNAS': '', # United States
'XBUE': 'BA', # Argentina
'XVIE': 'VI', # Austria
'XASX': 'AX', 'XAUS': 'XA', # Australia
'XBRU': 'BR', # Belgium
'BVMF': 'SA', # Brazil
'CNSX': 'CN', 'NEOE': 'NE', 'XTSE': 'TO', 'XTSX': 'V', # Canada
'XSGO': 'SN', # Chile
'XSHG': 'SS', 'XSHE': 'SZ', # China
'XBOG': 'CL', # Colombia
'XPRA': 'PR', # Czech Republic
'XCSE': 'CO', # Denmark
'XCAI': 'CA', # Egypt
'XTAL': 'TL', # Estonia
'CEUX': 'XD', 'XEUR': 'NX', # Europe (Cboe Europe, Euronext)
'XHEL': 'HE', # Finland
'XPAR': 'PA', # France
'XBER': 'BE', 'XBMS': 'BM', 'XDUS': 'DU', 'XFRA': 'F', 'XHAM': 'HM', 'XHAN': 'HA', 'XMUN': 'MU', 'XSTU': 'SG', 'XETR': 'DE', # Germany
'XATH': 'AT', # Greece
'XHKG': 'HK', # Hong Kong
'XBUD': 'BD', # Hungary
'XICE': 'IC', # Iceland
'XBOM': 'BO', 'XNSE': 'NS', # India
'XIDX': 'JK', # Indonesia
'XDUB': 'IR', # Ireland
'XTAE': 'TA', # Israel
'MTAA': 'MI', 'EUTL': 'TI', # Italy
'XTKS': 'T', # Japan
'XKFE': 'KW', # Kuwait
'XRIS': 'RG', # Latvia
'XVIL': 'VS', # Lithuania
'XKLS': 'KL', # Malaysia
'XMEX': 'MX', # Mexico
'XAMS': 'AS', # Netherlands
'XNZE': 'NZ', # New Zealand
'XOSL': 'OL', # Norway
'XPHS': 'PS', # Philippines
'XWAR': 'WA', # Poland
'XLIS': 'LS', # Portugal
'XQAT': 'QA', # Qatar
'XBSE': 'RO', # Romania
'XSES': 'SI', # Singapore
'XJSE': 'JO', # South Africa
'XKRX': 'KS', 'KQKS': 'KQ', # South Korea
'BMEX': 'MC', # Spain
'XSAU': 'SR', # Saudi Arabia
'XSTO': 'ST', # Sweden
'XSWX': 'SW', # Switzerland
'ROCO': 'TWO', 'XTAI': 'TW', # Taiwan
'XBKK': 'BK', # Thailand
'XIST': 'IS', # Turkey
'XDFM': 'AE', # UAE
'AQXE': 'AQ', 'XCHI': 'XC', 'XLON': 'L', 'ILSE': 'IL', # United Kingdom
'XCAR': 'CR', # Venezuela
'XSTC': 'VN' # Vietnam
}
def merge_two_level_dicts(dict1, dict2):
result = dict1.copy()
for key, value in dict2.items():
if key in result:
# If both are sets, merge them
if isinstance(value, set) and isinstance(result[key], set):
result[key] = result[key] | value
# If both are dicts, merge their contents
elif isinstance(value, dict) and isinstance(result[key], dict):
result[key] = {
k: (result[key].get(k, set()) | v if isinstance(v, set)
else v) if k in result[key]
else v
for k, v in value.items()
}
else:
result[key] = value
return result
EQUITY_SCREENER_EQ_MAP = {
"exchange": {
'ar': {'BUE'},
'at': {'VIE'},
'au': {'ASX'},
'be': {'BRU'},
'br': {'SAO'},
'ca': {'CNQ', 'NEO', 'TOR', 'VAN'},
'ch': {'EBS'},
'cl': {'SGO'},
'cn': {'SHH', 'SHZ'},
'co': {'BVC'},
'cz': {'PRA'},
'de': {'BER', 'DUS', 'FRA', 'HAM', 'GER', 'MUN', 'STU'},
'dk': {'CPH'},
'ee': {'TAL'},
'eg': {'CAI'},
'es': {'MCE'},
'fi': {'HEL'},
'fr': {'PAR'},
'gb': {'AQS', 'IOB', 'LSE'},
'gr': {'ATH'},
'hk': {'HKG'},
'hu': {'BUD'},
'id': {'JKT'},
'ie': {'ISE'},
'il': {'TLV'},
'in': {'BSE', 'NSI'},
'is': {'ICE'},
'it': {'MIL'},
'jp': {'FKA', 'JPX', 'SAP'},
'kr': {'KOE', 'KSC'},
'kw': {'KUW'},
'lk': {},
'lt': {'LIT'},
'lv': {'RIS'},
'mx': {'MEX'},
'my': {'KLS'},
'nl': {'AMS'},
'no': {'OSL'},
'nz': {'NZE'},
'pe': {},
'ph': {'PHP', 'PHS'},
'pk': {},
'pl': {'WSE'},
'pt': {'LIS'},
'qa': {'DOH'},
'ro': {'BVB'},
'ru': {},
'sa': {'SAU'},
'se': {'STO'},
'sg': {'SES'},
'sr': {},
'sw': {'EBS'},
'th': {'SET'},
'tr': {'IST'},
'tw': {'TAI', 'TWO'},
'us': {'ASE', 'BTS', 'CXI', 'NCM', 'NGM', 'NMS', 'NYQ', 'OEM', 'OQB', 'OQX', 'PCX', 'PNK', 'YHD'},
've': {'CCS'},
'vn': {},
'za': {'JNB'}
},
"sector": {
"Basic Materials", "Industrials", "Communication Services", "Healthcare",
"Real Estate", "Technology", "Energy", "Utilities", "Financial Services",
"Consumer Defensive", "Consumer Cyclical"
},
"industry": SECTOR_INDUSTY_MAPPING,
"peer_group": {
"US Fund Equity Energy",
"US CE Convertibles",
"EAA CE UK Large-Cap Equity",
"EAA CE Other",
"US Fund Financial",
"India CE Multi-Cap",
"US Fund Foreign Large Blend",
"US Fund Consumer Cyclical",
"EAA Fund Global Equity Income",
"China Fund Sector Equity Financial and Real Estate",
"US Fund Equity Precious Metals",
"EAA Fund RMB Bond - Onshore",
"China Fund QDII Greater China Equity",
"US Fund Large Growth",
"EAA Fund Germany Equity",
"EAA Fund Hong Kong Equity",
"EAA CE UK Small-Cap Equity",
"US Fund Natural Resources",
"US CE Preferred Stock",
"India Fund Sector - Financial Services",
"US Fund Diversified Emerging Mkts",
"EAA Fund South Africa & Namibia Equity",
"China Fund QDII Sector Equity",
"EAA CE Sector Equity Biotechnology",
"EAA Fund Switzerland Equity",
"US Fund Large Value",
"EAA Fund Asia ex-Japan Equity",
"US Fund Health",
"US Fund China Region",
"EAA Fund Emerging Europe ex-Russia Equity",
"EAA Fund Sector Equity Industrial Materials",
"EAA Fund Japan Large-Cap Equity",
"EAA Fund EUR Corporate Bond",
"US Fund Technology",
"EAA CE Global Large-Cap Blend Equity",
"Mexico Fund Mexico Equity",
"US Fund Trading--Leveraged Equity",
"EAA Fund Sector Equity Consumer Goods & Services",
"US Fund Large Blend",
"EAA Fund Global Flex-Cap Equity",
"EAA Fund EUR Aggressive Allocation - Global",
"EAA Fund China Equity",
"EAA Fund Global Large-Cap Growth Equity",
"US CE Options-based",
"EAA Fund Sector Equity Financial Services",
"EAA Fund Europe Large-Cap Blend Equity",
"EAA Fund China Equity - A Shares",
"EAA Fund USD Corporate Bond",
"EAA Fund Eurozone Large-Cap Equity",
"China Fund Aggressive Allocation Fund",
"EAA Fund Sector Equity Technology",
"EAA Fund Global Emerging Markets Equity",
"EAA Fund EUR Moderate Allocation - Global",
"EAA Fund Other Bond",
"EAA Fund Denmark Equity",
"EAA Fund US Large-Cap Blend Equity",
"India Fund Large-Cap",
"Paper & Forestry",
"Containers & Packaging",
"US Fund Miscellaneous Region",
"Energy Services",
"EAA Fund Other Equity",
"Homebuilders",
"Construction Materials",
"China Fund Equity Funds",
"Steel",
"Consumer Durables",
"EAA Fund Global Large-Cap Blend Equity",
"Transportation Infrastructure",
"Precious Metals",
"Building Products",
"Traders & Distributors",
"Electrical Equipment",
"Auto Components",
"Construction & Engineering",
"Aerospace & Defense",
"Refiners & Pipelines",
"Diversified Metals",
"Textiles & Apparel",
"Industrial Conglomerates",
"Household Products",
"Commercial Services",
"Food Retailers",
"Semiconductors",
"Media",
"Automobiles",
"Consumer Services",
"Technology Hardware",
"Transportation",
"Telecommunication Services",
"Oil & Gas Producers",
"Machinery",
"Retailing",
"Healthcare",
"Chemicals",
"Food Products",
"Diversified Financials",
"Real Estate",
"Insurance",
"Utilities",
"Pharmaceuticals",
"Software & Services",
"Banks"
}
}
EQUITY_SCREENER_EQ_MAP['region'] = EQUITY_SCREENER_EQ_MAP['exchange'].keys()
ordered_keys = ['region'] + [k for k in EQUITY_SCREENER_EQ_MAP.keys() if k != 'region']
EQUITY_SCREENER_EQ_MAP = {k:EQUITY_SCREENER_EQ_MAP[k] for k in ordered_keys}
FUND_SCREENER_EQ_MAP = {
"exchange": {
'us': {'NAS'}
}
}
COMMON_SCREENER_FIELDS = {
"price":{
"eodprice",
"intradaypricechange",
"intradayprice"
},
"eq_fields": {
"exchange"},
}
FUND_SCREENER_FIELDS = {
"eq_fields": {
"categoryname",
"performanceratingoverall",
"initialinvestment",
"annualreturnnavy1categoryrank",
"riskratingoverall"}
}
FUND_SCREENER_FIELDS = merge_two_level_dicts(FUND_SCREENER_FIELDS, COMMON_SCREENER_FIELDS)
EQUITY_SCREENER_FIELDS = {
"eq_fields": {
"region",
"sector",
"peer_group",
"industry"},
"price":{
"lastclosemarketcap.lasttwelvemonths",
"percentchange",
"lastclose52weekhigh.lasttwelvemonths",
"fiftytwowkpercentchange",
"lastclose52weeklow.lasttwelvemonths",
"intradaymarketcap"},
"trading":{
"beta",
"avgdailyvol3m",
"pctheldinsider",
"pctheldinst",
"dayvolume",
"eodvolume"},
"short_interest":{
"short_percentage_of_shares_outstanding.value",
"short_interest.value",
"short_percentage_of_float.value",
"days_to_cover_short.value",
"short_interest_percentage_change.value"},
"valuation":{
"bookvalueshare.lasttwelvemonths",
"lastclosemarketcaptotalrevenue.lasttwelvemonths",
"lastclosetevtotalrevenue.lasttwelvemonths",
"pricebookratio.quarterly",
"peratio.lasttwelvemonths",
"lastclosepricetangiblebookvalue.lasttwelvemonths",
"lastclosepriceearnings.lasttwelvemonths",
"pegratio_5y"},
"profitability":{
"consecutive_years_of_dividend_growth_count",
"returnonassets.lasttwelvemonths",
"returnonequity.lasttwelvemonths",
"forward_dividend_per_share",
"forward_dividend_yield",
"returnontotalcapital.lasttwelvemonths"},
"leverage":{
"lastclosetevebit.lasttwelvemonths",
"netdebtebitda.lasttwelvemonths",
"totaldebtequity.lasttwelvemonths",
"ltdebtequity.lasttwelvemonths",
"ebitinterestexpense.lasttwelvemonths",
"ebitdainterestexpense.lasttwelvemonths",
"lastclosetevebitda.lasttwelvemonths",
"totaldebtebitda.lasttwelvemonths"},
"liquidity":{
"quickratio.lasttwelvemonths",
"altmanzscoreusingtheaveragestockinformationforaperiod.lasttwelvemonths",
"currentratio.lasttwelvemonths",
"operatingcashflowtocurrentliabilities.lasttwelvemonths"},
"income_statement":{
"totalrevenues.lasttwelvemonths",
"netincomemargin.lasttwelvemonths",
"grossprofit.lasttwelvemonths",
"ebitda1yrgrowth.lasttwelvemonths",
"dilutedepscontinuingoperations.lasttwelvemonths",
"quarterlyrevenuegrowth.quarterly",
"epsgrowth.lasttwelvemonths",
"netincomeis.lasttwelvemonths",
"ebitda.lasttwelvemonths",
"dilutedeps1yrgrowth.lasttwelvemonths",
"totalrevenues1yrgrowth.lasttwelvemonths",
"operatingincome.lasttwelvemonths",
"netincome1yrgrowth.lasttwelvemonths",
"grossprofitmargin.lasttwelvemonths",
"ebitdamargin.lasttwelvemonths",
"ebit.lasttwelvemonths",
"basicepscontinuingoperations.lasttwelvemonths",
"netepsbasic.lasttwelvemonths"
"netepsdiluted.lasttwelvemonths"},
"balance_sheet":{
"totalassets.lasttwelvemonths",
"totalcommonsharesoutstanding.lasttwelvemonths",
"totaldebt.lasttwelvemonths",
"totalequity.lasttwelvemonths",
"totalcurrentassets.lasttwelvemonths",
"totalcashandshortterminvestments.lasttwelvemonths",
"totalcommonequity.lasttwelvemonths",
"totalcurrentliabilities.lasttwelvemonths",
"totalsharesoutstanding"},
"cash_flow":{
"forward_dividend_yield",
"leveredfreecashflow.lasttwelvemonths",
"capitalexpenditure.lasttwelvemonths",
"cashfromoperations.lasttwelvemonths",
"leveredfreecashflow1yrgrowth.lasttwelvemonths",
"unleveredfreecashflow.lasttwelvemonths",
"cashfromoperations1yrgrowth.lasttwelvemonths"},
"esg":{
"esg_score",
"environmental_score",
"governance_score",
"social_score",
"highest_controversy"}
}
EQUITY_SCREENER_FIELDS = merge_two_level_dicts(EQUITY_SCREENER_FIELDS, COMMON_SCREENER_FIELDS)
USER_AGENTS = [
# Chrome
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36",
# Firefox
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:135.0) Gecko/20100101 Firefox/135.0",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14.7; rv:135.0) Gecko/20100101 Firefox/135.0",
"Mozilla/5.0 (X11; Linux i686; rv:135.0) Gecko/20100101 Firefox/135.0",
# Safari
"Mozilla/5.0 (Macintosh; Intel Mac OS X 14_7_4) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.3 Safari/605.1.15",
# Edge
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/133.0.0.0 Safari/537.36 Edg/131.0.2903.86"
]
@@ -0,0 +1,544 @@
import functools
from functools import lru_cache
import socket
import time as _time
from curl_cffi import requests
from urllib.parse import urlsplit, urljoin
from bs4 import BeautifulSoup
import datetime
from frozendict import frozendict
from . import utils, cache
from .config import YfConfig
import threading
from .exceptions import YFException, YFDataException, YFRateLimitError
def _is_transient_error(exception):
"""Check if error is transient (network/timeout) and should be retried."""
if isinstance(exception, (TimeoutError, socket.error, OSError)):
return True
error_type_name = type(exception).__name__
transient_error_types = {
'Timeout', 'TimeoutError', 'ConnectionError', 'ConnectTimeout',
'ReadTimeout', 'ChunkedEncodingError', 'RemoteDisconnected',
}
return error_type_name in transient_error_types
cache_maxsize = 64
def lru_cache_freezeargs(func):
"""
Decorator transforms mutable dictionary and list arguments into immutable types
Needed so lru_cache can cache method calls what has dict or list arguments.
"""
@functools.wraps(func)
def wrapped(*args, **kwargs):
args = tuple([frozendict(arg) if isinstance(arg, dict) else arg for arg in args])
kwargs = {k: frozendict(v) if isinstance(v, dict) else v for k, v in kwargs.items()}
args = tuple([tuple(arg) if isinstance(arg, list) else arg for arg in args])
kwargs = {k: tuple(v) if isinstance(v, list) else v for k, v in kwargs.items()}
return func(*args, **kwargs)
# copy over the lru_cache extra methods to this wrapper to be able to access them
# after this decorator has been applied
wrapped.cache_info = func.cache_info
wrapped.cache_clear = func.cache_clear
return wrapped
class SingletonMeta(type):
"""
Metaclass that creates a Singleton instance.
"""
_instances = {}
_lock = threading.Lock()
def __call__(cls, *args, **kwargs):
with cls._lock:
if cls not in cls._instances:
instance = super().__call__(*args, **kwargs)
cls._instances[cls] = instance
else:
# Update the existing instance
if 'session' in kwargs or (args and len(args) > 0):
session = kwargs.get('session') if 'session' in kwargs else args[0]
cls._instances[cls]._set_session(session)
return cls._instances[cls]
class YfData(metaclass=SingletonMeta):
"""
Have one place to retrieve data from Yahoo API in order to ease caching and speed up operations.
Singleton means one session one cookie shared by all threads.
"""
def __init__(self, session=None):
self._crumb = None
self._cookie = None
# Default to using 'basic' strategy
self._cookie_strategy = 'basic'
# If it fails, then fallback method is 'csrf'
# self._cookie_strategy = 'csrf'
self._cookie_lock = threading.Lock()
self._session = None
self._set_session(session or requests.Session(impersonate="chrome"))
def _set_session(self, session):
if session is None:
return
try:
session.cache
except AttributeError:
# Not caching
self._session_is_caching = False
else:
# Is caching. This is annoying.
# Can't simply use a non-caching session to fetch cookie & crumb,
# because then the caching-session won't have cookie.
self._session_is_caching = True
# But since switch to curl_cffi, can't use requests_cache with it.
raise YFDataException("request_cache sessions don't work with curl_cffi, which is necessary now for Yahoo API. Solution: stop setting session, let YF handle.")
if not isinstance(session, requests.session.Session):
raise YFDataException(f"Yahoo API requires curl_cffi session not {type(session)}. Solution: stop setting session, let YF handle.")
with self._cookie_lock:
self._session = session
if YfConfig.network.proxy is not None:
self._session.proxies = YfConfig.network.proxy
def _set_cookie_strategy(self, strategy, have_lock=False):
if strategy == self._cookie_strategy:
return
if not have_lock:
self._cookie_lock.acquire()
try:
if self._cookie_strategy == 'csrf':
utils.get_yf_logger().debug(f'toggling cookie strategy {self._cookie_strategy} -> basic')
self._session.cookies.clear()
self._cookie_strategy = 'basic'
else:
utils.get_yf_logger().debug(f'toggling cookie strategy {self._cookie_strategy} -> csrf')
self._cookie_strategy = 'csrf'
self._cookie = None
self._crumb = None
except Exception:
self._cookie_lock.release()
raise
if not have_lock:
self._cookie_lock.release()
@utils.log_indent_decorator
def _save_cookie_curlCffi(self):
if self._session is None:
return False
cookies = self._session.cookies.jar._cookies
if len(cookies) == 0:
return False
yh_domains = [k for k in cookies.keys() if 'yahoo' in k]
if len(yh_domains) > 1:
# Possible when cookie fetched with CSRF method. Discard consent cookie.
yh_domains = [k for k in yh_domains if 'consent' not in k]
if len(yh_domains) > 1:
utils.get_yf_logger().debug(f'Multiple Yahoo cookies, not sure which to cache: {yh_domains}')
return False
if len(yh_domains) == 0:
return False
yh_domain = yh_domains[0]
yh_cookie = {yh_domain: cookies[yh_domain]}
cache.get_cookie_cache().store('curlCffi', yh_cookie)
return True
@utils.log_indent_decorator
def _load_cookie_curlCffi(self):
if self._session is None:
return False
cookie_dict = cache.get_cookie_cache().lookup('curlCffi')
if cookie_dict is None or len(cookie_dict) == 0:
return False
cookies = cookie_dict['cookie']
domain = list(cookies.keys())[0]
cookie = cookies[domain]['/']['A3']
expiry_ts = cookie.expires
if expiry_ts > 2e9:
# convert ms to s
expiry_ts //= 1e3
expiry_dt = datetime.datetime.fromtimestamp(expiry_ts, tz=datetime.timezone.utc)
expired = expiry_dt < datetime.datetime.now(datetime.timezone.utc)
if expired:
utils.get_yf_logger().debug('cached cookie expired')
return False
self._session.cookies.jar._cookies.update(cookies)
self._cookie = cookie
return True
@utils.log_indent_decorator
def _get_cookie_basic(self, timeout=30):
if self._cookie is not None:
utils.get_yf_logger().debug('reusing cookie')
return True
elif self._load_cookie_curlCffi():
utils.get_yf_logger().debug('reusing persistent cookie')
return True
# To avoid infinite recursion, do NOT use self.get()
# - 'allow_redirects' copied from @psychoz971 solution - does it help USA?
try:
self._session.get(
url='https://fc.yahoo.com',
timeout=timeout,
allow_redirects=True)
except requests.exceptions.DNSError as e:
# Possible because url on some privacy/ad blocklists.
# Can ignore because have second strategy.
utils.get_yf_logger().debug("Handling DNS error on cookie fetch: " + str(e))
return False
self._save_cookie_curlCffi()
return True
@utils.log_indent_decorator
def _get_crumb_basic(self, timeout=30):
if self._crumb is not None:
utils.get_yf_logger().debug('reusing crumb')
return self._crumb
if not self._get_cookie_basic():
return None
# - 'allow_redirects' copied from @psychoz971 solution - does it help USA?
get_args = {
'url': "https://query1.finance.yahoo.com/v1/test/getcrumb",
'timeout': timeout,
'allow_redirects': True
}
if self._session_is_caching:
get_args['expire_after'] = self._expire_after
crumb_response = self._session.get(**get_args)
else:
crumb_response = self._session.get(**get_args)
self._crumb = crumb_response.text
if crumb_response.status_code == 429 or "Too Many Requests" in self._crumb:
utils.get_yf_logger().debug(f"Didn't receive crumb {self._crumb}")
raise YFRateLimitError()
if self._crumb is None or '<html>' in self._crumb:
utils.get_yf_logger().debug("Didn't receive crumb")
return None
utils.get_yf_logger().debug(f"crumb = '{self._crumb}'")
return self._crumb
@utils.log_indent_decorator
def _get_cookie_and_crumb_basic(self, timeout):
if not self._get_cookie_basic(timeout):
return None
return self._get_crumb_basic(timeout)
@utils.log_indent_decorator
def _get_cookie_csrf(self, timeout):
if self._cookie is not None:
utils.get_yf_logger().debug('reusing cookie')
return True
elif self._load_cookie_curlCffi():
utils.get_yf_logger().debug('reusing persistent cookie')
self._cookie = True
return True
base_args = {
'timeout': timeout}
get_args = {**base_args, 'url': 'https://guce.yahoo.com/consent'}
try:
if self._session_is_caching:
get_args['expire_after'] = self._expire_after
response = self._session.get(**get_args)
else:
response = self._session.get(**get_args)
except requests.exceptions.ChunkedEncodingError:
# No idea why happens, but handle nicely so can switch to other cookie method.
utils.get_yf_logger().debug('_get_cookie_csrf() encountering requests.exceptions.ChunkedEncodingError, aborting')
return False
soup = BeautifulSoup(response.content, 'html.parser')
csrfTokenInput = soup.find('input', attrs={'name': 'csrfToken'})
if csrfTokenInput is None:
utils.get_yf_logger().debug('Failed to find "csrfToken" in response')
return False
csrfToken = csrfTokenInput['value']
utils.get_yf_logger().debug(f'csrfToken = {csrfToken}')
sessionIdInput = soup.find('input', attrs={'name': 'sessionId'})
sessionId = sessionIdInput['value']
utils.get_yf_logger().debug(f"sessionId='{sessionId}")
originalDoneUrl = 'https://finance.yahoo.com/'
namespace = 'yahoo'
data = {
'agree': ['agree', 'agree'],
'consentUUID': 'default',
'sessionId': sessionId,
'csrfToken': csrfToken,
'originalDoneUrl': originalDoneUrl,
'namespace': namespace,
}
post_args = {**base_args,
'url': f'https://consent.yahoo.com/v2/collectConsent?sessionId={sessionId}',
'data': data}
get_args = {**base_args,
'url': f'https://guce.yahoo.com/copyConsent?sessionId={sessionId}',
'data': data}
try:
if self._session_is_caching:
post_args['expire_after'] = self._expire_after
get_args['expire_after'] = self._expire_after
self._session.post(**post_args)
self._session.get(**get_args)
else:
self._session.post(**post_args)
self._session.get(**get_args)
except requests.exceptions.ChunkedEncodingError:
# No idea why happens, but handle nicely so can switch to other cookie method.
utils.get_yf_logger().debug('_get_cookie_csrf() encountering requests.exceptions.ChunkedEncodingError, aborting')
self._cookie = True
self._save_cookie_curlCffi()
return True
@utils.log_indent_decorator
def _get_crumb_csrf(self, timeout=30):
# Credit goes to @bot-unit #1729
if self._crumb is not None:
utils.get_yf_logger().debug('reusing crumb')
return self._crumb
if not self._get_cookie_csrf(timeout):
# This cookie stored in session
return None
get_args = {
'url': 'https://query2.finance.yahoo.com/v1/test/getcrumb',
'timeout': timeout}
if self._session_is_caching:
get_args['expire_after'] = self._expire_after
r = self._session.get(**get_args)
else:
r = self._session.get(**get_args)
self._crumb = r.text
if r.status_code == 429 or "Too Many Requests" in self._crumb:
utils.get_yf_logger().debug(f"Didn't receive crumb {self._crumb}")
raise YFRateLimitError()
if self._crumb is None or '<html>' in self._crumb or self._crumb == '':
utils.get_yf_logger().debug("Didn't receive crumb")
return None
utils.get_yf_logger().debug(f"crumb = '{self._crumb}'")
return self._crumb
@utils.log_indent_decorator
def _get_cookie_and_crumb(self, timeout=30):
crumb, strategy = None, None
utils.get_yf_logger().debug(f"cookie_mode = '{self._cookie_strategy}'")
with self._cookie_lock:
if self._cookie_strategy == 'csrf':
crumb = self._get_crumb_csrf()
if crumb is None:
# Fail
self._set_cookie_strategy('basic', have_lock=True)
crumb = self._get_cookie_and_crumb_basic(timeout)
else:
# Fallback strategy
crumb = self._get_cookie_and_crumb_basic(timeout)
if crumb is None:
# Fail
self._set_cookie_strategy('csrf', have_lock=True)
crumb = self._get_crumb_csrf()
strategy = self._cookie_strategy
return crumb, strategy
@utils.log_indent_decorator
def get(self, url, params=None, timeout=30):
response = self._make_request(url, request_method = self._session.get, params=params, timeout=timeout)
# Accept cookie-consent if redirected to consent page
if not self._is_this_consent_url(response.url):
# "Consent Page not detected"
pass
else:
# "Consent Page detected"
response = self._accept_consent_form(response, timeout)
return response
@utils.log_indent_decorator
def post(self, url, body=None, params=None, timeout=30, data=None):
return self._make_request(url, request_method = self._session.post, body=body, params=params, timeout=timeout, data=data)
@utils.log_indent_decorator
def _make_request(self, url, request_method, body=None, params=None, timeout=30, data=None):
# Important: treat input arguments as immutable.
if len(url) > 200:
utils.get_yf_logger().debug(f'url={url[:200]}...')
else:
utils.get_yf_logger().debug(f'url={url}')
utils.get_yf_logger().debug(f'params={params}')
# sync with config
self._session.proxies = YfConfig.network.proxy
if params is None:
params = {}
if 'crumb' in params:
raise YFException("Don't manually add 'crumb' to params dict, let data.py handle it")
crumb, strategy = self._get_cookie_and_crumb()
if crumb is not None:
crumbs = {'crumb': crumb}
else:
crumbs = {}
request_args = {
'url': url,
'params': {**params, **crumbs},
'timeout': timeout
}
if body:
request_args['json'] = body
if data:
request_args['data'] = data
request_args['headers'] = {"Content-Type": "application/json"}
for attempt in range(YfConfig.network.retries + 1):
try:
response = request_method(**request_args)
break
except Exception as e:
if _is_transient_error(e) and attempt < YfConfig.network.retries:
_time.sleep(2 ** attempt)
else:
raise
utils.get_yf_logger().debug(f'response code={response.status_code}')
if response.status_code >= 400:
# Retry with other cookie strategy
if strategy == 'basic':
self._set_cookie_strategy('csrf')
else:
self._set_cookie_strategy('basic')
crumb, strategy = self._get_cookie_and_crumb(timeout)
request_args['params']['crumb'] = crumb
response = request_method(**request_args)
utils.get_yf_logger().debug(f'response code={response.status_code}')
# Raise exception if rate limited
if response.status_code == 429:
raise YFRateLimitError()
return response
@lru_cache_freezeargs
@lru_cache(maxsize=cache_maxsize)
def cache_get(self, url, params=None, timeout=30):
return self.get(url, params, timeout)
def get_raw_json(self, url, params=None, timeout=30):
utils.get_yf_logger().debug(f'get_raw_json(): {url}')
response = self.get(url, params=params, timeout=timeout)
response.raise_for_status()
return response.json()
def _is_this_consent_url(self, response_url: str) -> bool:
"""
Check if given response_url is consent page
Args:
response_url (str) : response.url
Returns:
True : This is cookie-consent page
False : This is not cookie-consent page
"""
try:
return urlsplit(response_url).hostname and urlsplit(
response_url
).hostname.endswith("consent.yahoo.com")
except Exception:
return False
def _accept_consent_form(
self, consent_resp: requests.Response, timeout: int
) -> requests.Response:
"""
Click 'Accept all' to cookie-consent form and return response object.
Args:
consent_resp (requests.Response) : Response instance of cookie-consent page
timeout (int) : Raise TimeoutError if post doesn't respond
Returns:
response (requests.Response) : Reponse instance received from the server after accepting cookie-consent post.
"""
soup = BeautifulSoup(consent_resp.text, "html.parser")
# Heuristic: pick the first form; Yahoo's CMP tends to have a single form for consent
form = soup.find("form")
if not form:
return consent_resp
# action : URL to send "Accept Cookies"
action = form.get("action") or consent_resp.url
action = urljoin(consent_resp.url, action)
# Collect inputs (hidden tokens, etc.)
"""
<input name="csrfToken" type="hidden" value="..."/>
<input name="sessionId" type="hidden" value="..."/>
<input name="originalDoneUrl" type="hidden" value="..."/>
<input name="namespace" type="hidden" value="yahoo"/>
"""
data = {}
for inp in form.find_all("input"):
name = inp.get("name")
if not name:
continue
typ = (inp.get("type") or "text").lower()
val = inp.get("value") or ""
if typ in ("checkbox", "radio"):
# If it's clearly an "agree"/"accept" field or already checked, include it
if (
"agree" in name.lower()
or "accept" in name.lower()
or inp.has_attr("checked")
):
data[name] = val if val != "" else "1"
else:
data[name] = val
# If no explicit agree/accept in inputs, add a best-effort flag
lowered = {k.lower() for k in data.keys()}
if not any(("agree" in k or "accept" in k) for k in lowered):
data["agree"] = "1"
# Submit the form with "Referer". Some servers check this header as a simple CSRF protection measure.
headers = {"Referer": consent_resp.url}
response = self._session.post(
action, data=data, headers=headers, timeout=timeout, allow_redirects=True
)
return response
@@ -0,0 +1,5 @@
# domain/__init__.py
from .sector import Sector
from .industry import Industry
__all__ = ['Sector', 'Industry']
@@ -0,0 +1,195 @@
from abc import ABC, abstractmethod
import pandas as _pd
from typing import Dict, List, Optional
from ..const import _QUERY1_URL_
from ..data import YfData
from ..ticker import Ticker
_QUERY_URL_ = f'{_QUERY1_URL_}/v1/finance'
class Domain(ABC):
"""
Abstract base class representing a domain entity in financial data, with key attributes
and methods for fetching and parsing data. Derived classes must implement the `_fetch_and_parse()` method.
"""
def __init__(self, key: str, session=None):
"""
Initializes the Domain object with a key, session.
Args:
key (str): Unique key identifying the domain entity.
session (Optional[requests.Session]): Session object for HTTP requests. Defaults to None.
"""
self._key: str = key
self.session = session
self._data: YfData = YfData(session=session)
self._name: Optional[str] = None
self._symbol: Optional[str] = None
self._overview: Optional[Dict] = None
self._top_companies: Optional[_pd.DataFrame] = None
self._research_reports: Optional[List[Dict[str, str]]] = None
@property
def key(self) -> str:
"""
Retrieves the key of the domain entity.
Returns:
str: The unique key of the domain entity.
"""
return self._key
@property
def name(self) -> str:
"""
Retrieves the name of the domain entity.
Returns:
str: The name of the domain entity.
"""
self._ensure_fetched(self._name)
return self._name
@property
def symbol(self) -> str:
"""
Retrieves the symbol of the domain entity.
Returns:
str: The symbol representing the domain entity.
"""
self._ensure_fetched(self._symbol)
return self._symbol
@property
def ticker(self) -> Ticker:
"""
Retrieves a Ticker object based on the domain entity's symbol.
Returns:
Ticker: A Ticker object associated with the domain entity.
"""
self._ensure_fetched(self._symbol)
return Ticker(self._symbol)
@property
def overview(self) -> Dict:
"""
Retrieves the overview information of the domain entity.
Returns:
Dict: A dictionary containing an overview of the domain entity.
"""
self._ensure_fetched(self._overview)
return self._overview
@property
def top_companies(self) -> Optional[_pd.DataFrame]:
"""
Retrieves the top companies within the domain entity.
Returns:
pandas.DataFrame: A DataFrame containing the top companies in the domain.
"""
self._ensure_fetched(self._top_companies)
return self._top_companies
@property
def research_reports(self) -> List[Dict[str, str]]:
"""
Retrieves research reports related to the domain entity.
Returns:
List[Dict[str, str]]: A list of research reports, where each report is a dictionary with metadata.
"""
self._ensure_fetched(self._research_reports)
return self._research_reports
def _fetch(self, query_url) -> Dict:
"""
Fetches data from the given query URL.
Args:
query_url (str): The URL used for the data query.
Returns:
Dict: The JSON response data from the request.
"""
params_dict = {"formatted": "true", "withReturns": "true", "lang": "en-US", "region": "US"}
result = self._data.get_raw_json(query_url, params=params_dict)
return result
def _parse_and_assign_common(self, data) -> None:
"""
Parses and assigns common data fields such as name, symbol, overview, and top companies.
Args:
data (Dict): The raw data received from the API.
"""
self._name = data.get('name')
self._symbol = data.get('symbol')
self._overview = self._parse_overview(data.get('overview', {}))
self._top_companies = self._parse_top_companies(data.get('topCompanies', {}))
self._research_reports = data.get('researchReports')
def _parse_overview(self, overview) -> Dict:
"""
Parses the overview data for the domain entity.
Args:
overview (Dict): The raw overview data.
Returns:
Dict: A dictionary containing parsed overview information.
"""
return {
"companies_count": overview.get('companiesCount', None),
"market_cap": overview.get('marketCap', {}).get('raw', None),
"message_board_id": overview.get('messageBoardId', None),
"description": overview.get('description', None),
"industries_count": overview.get('industriesCount', None),
"market_weight": overview.get('marketWeight', {}).get('raw', None),
"employee_count": overview.get('employeeCount', {}).get('raw', None)
}
def _parse_top_companies(self, top_companies) -> Optional[_pd.DataFrame]:
"""
Parses the top companies data and converts it into a pandas DataFrame.
Args:
top_companies (Dict): The raw top companies data.
Returns:
Optional[pandas.DataFrame]: A DataFrame containing top company data, or None if no data is available.
"""
top_companies_column = ['symbol', 'name', 'rating', 'market weight']
top_companies_values = [(c.get('symbol'),
c.get('name'),
c.get('rating'),
c.get('marketWeight',{}).get('raw',None)) for c in top_companies]
if not top_companies_values:
return None
return _pd.DataFrame(top_companies_values, columns=top_companies_column).set_index('symbol')
@abstractmethod
def _fetch_and_parse(self) -> None:
"""
Abstract method for fetching and parsing domain-specific data.
Must be implemented by derived classes.
"""
raise NotImplementedError("_fetch_and_parse() needs to be implemented by children classes")
def _ensure_fetched(self, attribute) -> None:
"""
Ensures that the given attribute is fetched by calling `_fetch_and_parse()` if the attribute is None.
Args:
attribute: The attribute to check and potentially fetch.
"""
if attribute is None:
self._fetch_and_parse()
@@ -0,0 +1,153 @@
from __future__ import print_function
import pandas as _pd
from typing import Dict, Optional
from .. import utils
from ..config import YfConfig
from ..data import YfData
from .domain import Domain, _QUERY_URL_
class Industry(Domain):
"""
Represents an industry within a sector.
"""
def __init__(self, key, session=None):
"""
Args:
key (str): The key identifier for the industry.
session (optional): The session to use for requests.
"""
YfData(session=session)
super(Industry, self).__init__(key, session)
self._query_url = f'{_QUERY_URL_}/industries/{self._key}'
self._sector_key = None
self._sector_name = None
self._top_performing_companies = None
self._top_growth_companies = None
def __repr__(self):
"""
Returns a string representation of the Industry instance.
Returns:
str: String representation of the Industry instance.
"""
return f'yfinance.Industry object <{self._key}>'
@property
def sector_key(self) -> str:
"""
Returns the sector key of the industry.
Returns:
str: The sector key.
"""
self._ensure_fetched(self._sector_key)
return self._sector_key
@property
def sector_name(self) -> str:
"""
Returns the sector name of the industry.
Returns:
str: The sector name.
"""
self._ensure_fetched(self._sector_name)
return self._sector_name
@property
def top_performing_companies(self) -> Optional[_pd.DataFrame]:
"""
Returns the top performing companies in the industry.
Returns:
Optional[pd.DataFrame]: DataFrame containing top performing companies.
"""
self._ensure_fetched(self._top_performing_companies)
return self._top_performing_companies
@property
def top_growth_companies(self) -> Optional[_pd.DataFrame]:
"""
Returns the top growth companies in the industry.
Returns:
Optional[pd.DataFrame]: DataFrame containing top growth companies.
"""
self._ensure_fetched(self._top_growth_companies)
return self._top_growth_companies
def _parse_top_performing_companies(self, top_performing_companies: Dict) -> Optional[_pd.DataFrame]:
"""
Parses the top performing companies data.
Args:
top_performing_companies (Dict): Dictionary containing top performing companies data.
Returns:
Optional[pd.DataFrame]: DataFrame containing parsed top performing companies data.
"""
compnaies_column = ['symbol','name','ytd return','last price','target price']
compnaies_values = [(c.get('symbol', None),
c.get('name', None),
c.get('ytdReturn',{}).get('raw', None),
c.get('lastPrice',{}).get('raw', None),
c.get('targetPrice',{}).get('raw', None),) for c in top_performing_companies]
if not compnaies_values:
return None
return _pd.DataFrame(compnaies_values, columns = compnaies_column).set_index('symbol')
def _parse_top_growth_companies(self, top_growth_companies: Dict) -> Optional[_pd.DataFrame]:
"""
Parses the top growth companies data.
Args:
top_growth_companies (Dict): Dictionary containing top growth companies data.
Returns:
Optional[pd.DataFrame]: DataFrame containing parsed top growth companies data.
"""
compnaies_column = ['symbol','name','ytd return','growth estimate']
compnaies_values = [(c.get('symbol', None),
c.get('name', None),
c.get('ytdReturn',{}).get('raw', None),
c.get('growthEstimate',{}).get('raw', None),) for c in top_growth_companies]
if not compnaies_values:
return None
return _pd.DataFrame(compnaies_values, columns = compnaies_column).set_index('symbol')
def _fetch_and_parse(self) -> None:
"""
Fetches and parses the industry data.
"""
result = None
try:
result = self._fetch(self._query_url)
data = result['data']
self._parse_and_assign_common(data)
self._sector_key = data.get('sectorKey')
self._sector_name = data.get('sectorName')
self._top_performing_companies = self._parse_top_performing_companies(data.get('topPerformingCompanies'))
self._top_growth_companies = self._parse_top_growth_companies(data.get('topGrowthCompanies'))
return result
except Exception as e:
if not YfConfig.debug.hide_exceptions:
raise
logger = utils.get_yf_logger()
logger.error(f"Failed to get industry data for '{self._key}' reason: {e}")
logger.debug("Got response: ")
logger.debug("-------------")
logger.debug(f" {result}")
logger.debug("-------------")
@@ -0,0 +1,107 @@
import datetime as dt
import json as _json
from ..config import YfConfig
from ..const import _QUERY1_URL_
from ..data import utils, YfData
from ..exceptions import YFDataException
class Market:
def __init__(self, market:'str', session=None, timeout=30):
self.market = market
self.session = session
self.timeout = timeout
self._data = YfData(session=self.session)
self._logger = utils.get_yf_logger()
self._status = None
self._summary = None
def _fetch_json(self, url, params):
data = self._data.cache_get(url=url, params=params, timeout=self.timeout)
if data is None or "Will be right back" in data.text:
raise YFDataException("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***")
try:
return data.json()
except _json.JSONDecodeError:
if not YfConfig.debug.hide_exceptions:
raise
self._logger.error(f"{self.market}: Failed to retrieve market data and recieved faulty data.")
return {}
def _parse_data(self):
# Fetch both to ensure they are at the same time
if (self._status is not None) and (self._summary is not None):
return
self._logger.debug(f"{self.market}: Parsing market data")
# Summary
summary_url = f"{_QUERY1_URL_}/v6/finance/quote/marketSummary"
summary_fields = ["shortName", "regularMarketPrice", "regularMarketChange", "regularMarketChangePercent"]
summary_params = {
"fields": ",".join(summary_fields),
"formatted": False,
"lang": "en-US",
"market": self.market
}
status_url = f"{_QUERY1_URL_}/v6/finance/markettime"
status_params = {
"formatted": True,
"key": "finance",
"lang": "en-US",
"market": self.market
}
self._summary = self._fetch_json(summary_url, summary_params)
self._status = self._fetch_json(status_url, status_params)
try:
self._summary = self._summary['marketSummaryResponse']['result']
self._summary = {x['exchange']:x for x in self._summary}
except Exception as e:
if not YfConfig.debug.hide_exceptions:
raise
self._logger.error(f"{self.market}: Failed to parse market summary")
self._logger.debug(f"{type(e)}: {e}")
try:
# Unpack
self._status = self._status['finance']['marketTimes'][0]['marketTime'][0]
self._status['timezone'] = self._status['timezone'][0]
del self._status['time'] # redundant
except Exception as e:
if not YfConfig.debug.hide_exceptions:
raise
self._logger.error(f"{self.market}: Failed to parse market status")
self._logger.debug(f"{type(e)}: {e}")
try:
self._status.update({
"open": dt.datetime.fromisoformat(self._status["open"]),
"close": dt.datetime.fromisoformat(self._status["close"]),
"tz": dt.timezone(dt.timedelta(hours=int(self._status["timezone"]["gmtoffset"]))/1000, self._status["timezone"]["short"])
})
except Exception as e:
if not YfConfig.debug.hide_exceptions:
raise
self._logger.error(f"{self.market}: Failed to update market status")
self._logger.debug(f"{type(e)}: {e}")
@property
def status(self):
self._parse_data()
return self._status
@property
def summary(self):
self._parse_data()
return self._summary
@@ -0,0 +1,152 @@
from __future__ import print_function
import pandas as _pd
from typing import Dict, Optional
from ..config import YfConfig
from ..const import SECTOR_INDUSTY_MAPPING_LC
from ..utils import dynamic_docstring, generate_list_table_from_dict, get_yf_logger
from .domain import Domain, _QUERY_URL_
class Sector(Domain):
"""
Represents a financial market sector and allows retrieval of sector-related data
such as top ETFs, top mutual funds, and industry data.
"""
def __init__(self, key, session=None):
"""
Args:
key (str): The key representing the sector.
session (requests.Session, optional): A session for making requests. Defaults to None.
.. seealso::
:attr:`Sector.industries <yfinance.Sector.industries>`
Map of sector and industry
"""
super(Sector, self).__init__(key, session)
self._query_url: str = f'{_QUERY_URL_}/sectors/{self._key}'
self._top_etfs: Optional[Dict] = None
self._top_mutual_funds: Optional[Dict] = None
self._industries: Optional[_pd.DataFrame] = None
def __repr__(self):
"""
Returns the string representation of the Sector object.
Returns:
str: A string representation of the object.
"""
return f'yfinance.Sector object <{self._key}>'
@property
def top_etfs(self) -> Dict[str, str]:
"""
Gets the top ETFs for the sector.
Returns:
Dict[str, str]: A dictionary of ETF symbols and names.
"""
self._ensure_fetched(self._top_etfs)
return self._top_etfs
@property
def top_mutual_funds(self) -> Dict[str, str]:
"""
Gets the top mutual funds for the sector.
Returns:
Dict[str, str]: A dictionary of mutual fund symbols and names.
"""
self._ensure_fetched(self._top_mutual_funds)
return self._top_mutual_funds
@dynamic_docstring({"sector_industry": generate_list_table_from_dict(SECTOR_INDUSTY_MAPPING_LC,bullets=True)})
@property
def industries(self) -> _pd.DataFrame:
"""
Gets the industries within the sector.
Returns:
pandas.DataFrame: A DataFrame with industries' key, name, symbol, and market weight.
{sector_industry}
"""
self._ensure_fetched(self._industries)
return self._industries
def _parse_top_etfs(self, top_etfs: Dict) -> Dict[str, str]:
"""
Parses top ETF data from the API response.
Args:
top_etfs (Dict): The raw ETF data from the API response.
Returns:
Dict[str, str]: A dictionary of ETF symbols and names.
"""
return {e.get('symbol'): e.get('name') for e in top_etfs}
def _parse_top_mutual_funds(self, top_mutual_funds: Dict) -> Dict[str, str]:
"""
Parses top mutual funds data from the API response.
Args:
top_mutual_funds (Dict): The raw mutual fund data from the API response.
Returns:
Dict[str, str]: A dictionary of mutual fund symbols and names.
"""
return {e.get('symbol'): e.get('name') for e in top_mutual_funds}
def _parse_industries(self, industries: Dict) -> _pd.DataFrame:
"""
Parses industry data from the API response into a DataFrame.
Args:
industries (Dict): The raw industry data from the API response.
Returns:
pandas.DataFrame: A DataFrame containing industry key, name, symbol, and market weight.
"""
industries_column = ['key','name','symbol','market weight']
industries_values = [(i.get('key'),
i.get('name'),
i.get('symbol'),
i.get('marketWeight',{}).get('raw', None)
) for i in industries if i.get('name') != 'All Industries']
return _pd.DataFrame(industries_values, columns=industries_column).set_index('key')
def _fetch_and_parse(self) -> None:
"""
Fetches and parses sector data from the API.
Fetches data for the sector and parses the top ETFs, top mutual funds,
and industries within the sector. Stores the parsed data in the corresponding
attributes `_top_etfs`, `_top_mutual_funds`, and `_industries`.
Raises:
Exception: If fetching or parsing the sector data fails.
"""
result = None
try:
result = self._fetch(self._query_url)
data = result['data']
self._parse_and_assign_common(data)
self._top_etfs = self._parse_top_etfs(data.get('topETFs', {}))
self._top_mutual_funds = self._parse_top_mutual_funds(data.get('topMutualFunds', {}))
self._industries = self._parse_industries(data.get('industries', {}))
except Exception as e:
if not YfConfig.debug.hide_exceptions:
raise
logger = get_yf_logger()
logger.error(f"Failed to get sector data for '{self._key}' reason: {e}")
logger.debug("Got response: ")
logger.debug("-------------")
logger.debug(f" {result}")
logger.debug("-------------")
@@ -0,0 +1,53 @@
class YFException(Exception):
def __init__(self, description=""):
super().__init__(description)
class YFDataException(YFException):
pass
class YFNotImplementedError(NotImplementedError):
def __init__(self, method_name):
super().__init__(f"Have not implemented fetching '{method_name}' from Yahoo API")
class YFTickerMissingError(YFException):
def __init__(self, ticker, rationale):
super().__init__(f"${ticker}: possibly delisted; {rationale}")
self.rationale = rationale
self.ticker = ticker
class YFTzMissingError(YFTickerMissingError):
def __init__(self, ticker):
super().__init__(ticker, "no timezone found")
class YFPricesMissingError(YFTickerMissingError):
def __init__(self, ticker, debug_info):
self.debug_info = debug_info
if debug_info != '':
super().__init__(ticker, f"no price data found {debug_info}")
else:
super().__init__(ticker, "no price data found")
class YFEarningsDateMissing(YFTickerMissingError):
# note that this does not get raised. Added in case of raising it in the future
def __init__(self, ticker):
super().__init__(ticker, "no earnings dates found")
class YFInvalidPeriodError(YFException):
def __init__(self, ticker, invalid_period, valid_ranges):
self.ticker = ticker
self.invalid_period = invalid_period
self.valid_ranges = valid_ranges
super().__init__(f"{self.ticker}: Period '{invalid_period}' is invalid, "
f"must be one of: {valid_ranges}")
class YFRateLimitError(YFException):
def __init__(self):
super().__init__("Too Many Requests. Rate limited. Try after a while.")
@@ -0,0 +1,350 @@
import asyncio
import base64
import json
from typing import List, Optional, Callable, Union
from websockets.sync.client import connect as sync_connect
from websockets.asyncio.client import connect as async_connect
from yfinance import utils
from yfinance.config import YfConfig
from yfinance.pricing_pb2 import PricingData
from google.protobuf.json_format import MessageToDict
class BaseWebSocket:
def __init__(self, url: str = "wss://streamer.finance.yahoo.com/?version=2", verbose=True):
self.url = url
self.verbose = verbose
self.logger = utils.get_yf_logger()
self._ws = None
self._subscriptions = set()
self._subscription_interval = 15 # seconds
def _decode_message(self, base64_message: str) -> dict:
try:
decoded_bytes = base64.b64decode(base64_message)
pricing_data = PricingData()
pricing_data.ParseFromString(decoded_bytes)
return MessageToDict(pricing_data, preserving_proto_field_name=True)
except Exception as e:
if not YfConfig.debug.hide_exceptions:
raise
self.logger.error("Failed to decode message: %s", e, exc_info=True)
if self.verbose:
print("Failed to decode message: %s", e)
return {
'error': str(e),
'raw_base64': base64_message
}
class AsyncWebSocket(BaseWebSocket):
"""
Asynchronous WebSocket client for streaming real time pricing data.
"""
def __init__(self, url: str = "wss://streamer.finance.yahoo.com/?version=2", verbose=True):
"""
Initialize the AsyncWebSocket client.
Args:
url (str): The WebSocket server URL. Defaults to Yahoo Finance's WebSocket URL.
verbose (bool): Flag to enable or disable print statements. Defaults to True.
"""
super().__init__(url, verbose)
self._message_handler = None # Callable to handle messages
self._heartbeat_task = None # Task to send heartbeat subscribe
async def _connect(self):
try:
if self._ws is None:
self._ws = await async_connect(self.url)
self.logger.info("Connected to WebSocket.")
if self.verbose:
print("Connected to WebSocket.")
except Exception as e:
if not YfConfig.debug.hide_exceptions:
raise
self.logger.error("Failed to connect to WebSocket: %s", e, exc_info=True)
if self.verbose:
print(f"Failed to connect to WebSocket: {e}")
self._ws = None
raise
async def _periodic_subscribe(self):
while True:
try:
await asyncio.sleep(self._subscription_interval)
if self._subscriptions:
message = {"subscribe": list(self._subscriptions)}
await self._ws.send(json.dumps(message))
if self.verbose:
print(f"Heartbeat subscription sent for symbols: {self._subscriptions}")
except Exception as e:
if not YfConfig.debug.hide_exceptions:
raise
self.logger.error("Error in heartbeat subscription: %s", e, exc_info=True)
if self.verbose:
print(f"Error in heartbeat subscription: {e}")
break
async def subscribe(self, symbols: Union[str, List[str]]):
"""
Subscribe to a stock symbol or a list of stock symbols.
Args:
symbols (Union[str, List[str]]): Stock symbol(s) to subscribe to.
"""
await self._connect()
if isinstance(symbols, str):
symbols = [symbols]
self._subscriptions.update(symbols)
message = {"subscribe": list(self._subscriptions)}
await self._ws.send(json.dumps(message))
# Start heartbeat subscription task
if self._heartbeat_task is None:
self._heartbeat_task = asyncio.create_task(self._periodic_subscribe())
self.logger.info(f"Subscribed to symbols: {symbols}")
if self.verbose:
print(f"Subscribed to symbols: {symbols}")
async def unsubscribe(self, symbols: Union[str, List[str]]):
"""
Unsubscribe from a stock symbol or a list of stock symbols.
Args:
symbols (Union[str, List[str]]): Stock symbol(s) to unsubscribe from.
"""
await self._connect()
if isinstance(symbols, str):
symbols = [symbols]
self._subscriptions.difference_update(symbols)
message = {"unsubscribe": symbols}
await self._ws.send(json.dumps(message))
self.logger.info(f"Unsubscribed from symbols: {symbols}")
if self.verbose:
print(f"Unsubscribed from symbols: {symbols}")
async def listen(self, message_handler=None):
"""
Start listening to messages from the WebSocket server.
Args:
message_handler (Optional[Callable[[dict], None]]): Optional function to handle received messages.
"""
await self._connect()
self._message_handler = message_handler
self.logger.info("Listening for messages...")
if self.verbose:
print("Listening for messages...")
# Start heartbeat subscription task
if self._heartbeat_task is None:
self._heartbeat_task = asyncio.create_task(self._periodic_subscribe())
while True:
try:
async for message in self._ws:
message_json = json.loads(message)
encoded_data = message_json.get("message", "")
decoded_message = self._decode_message(encoded_data)
if self._message_handler:
try:
if asyncio.iscoroutinefunction(self._message_handler):
await self._message_handler(decoded_message)
else:
self._message_handler(decoded_message)
except Exception as handler_exception:
if not YfConfig.debug.hide_exceptions:
raise
self.logger.error("Error in message handler: %s", handler_exception, exc_info=True)
if self.verbose:
print("Error in message handler:", handler_exception)
else:
print(decoded_message)
except (KeyboardInterrupt, asyncio.CancelledError):
self.logger.info("WebSocket listening interrupted. Closing connection...")
if self.verbose:
print("WebSocket listening interrupted. Closing connection...")
await self.close()
break
except Exception as e:
if not YfConfig.debug.hide_exceptions:
raise
self.logger.error("Error while listening to messages: %s", e, exc_info=True)
if self.verbose:
print("Error while listening to messages: %s", e)
# Attempt to reconnect if connection drops
self.logger.info("Attempting to reconnect...")
if self.verbose:
print("Attempting to reconnect...")
await asyncio.sleep(3) # backoff
await self._connect()
async def close(self):
"""Close the WebSocket connection."""
if self._heartbeat_task:
self._heartbeat_task.cancel()
if self._ws is not None: # and not self._ws.closed:
await self._ws.close()
self.logger.info("WebSocket connection closed.")
if self.verbose:
print("WebSocket connection closed.")
async def __aenter__(self):
await self._connect()
return self
async def __aexit__(self, exc_type, exc_value, traceback):
await self.close()
class WebSocket(BaseWebSocket):
"""
Synchronous WebSocket client for streaming real time pricing data.
"""
def __init__(self, url: str = "wss://streamer.finance.yahoo.com/?version=2", verbose=True):
"""
Initialize the WebSocket client.
Args:
url (str): The WebSocket server URL. Defaults to Yahoo Finance's WebSocket URL.
verbose (bool): Flag to enable or disable print statements. Defaults to True.
"""
super().__init__(url, verbose)
def _connect(self):
try:
if self._ws is None:
self._ws = sync_connect(self.url)
self.logger.info("Connected to WebSocket.")
if self.verbose:
print("Connected to WebSocket.")
except Exception as e:
self.logger.error("Failed to connect to WebSocket: %s", e, exc_info=True)
if self.verbose:
print(f"Failed to connect to WebSocket: {e}")
self._ws = None
raise
def subscribe(self, symbols: Union[str, List[str]]):
"""
Subscribe to a stock symbol or a list of stock symbols.
Args:
symbols (Union[str, List[str]]): Stock symbol(s) to subscribe to.
"""
self._connect()
if isinstance(symbols, str):
symbols = [symbols]
self._subscriptions.update(symbols)
message = {"subscribe": list(self._subscriptions)}
self._ws.send(json.dumps(message))
self.logger.info(f"Subscribed to symbols: {symbols}")
if self.verbose:
print(f"Subscribed to symbols: {symbols}")
def unsubscribe(self, symbols: Union[str, List[str]]):
"""
Unsubscribe from a stock symbol or a list of stock symbols.
Args:
symbols (Union[str, List[str]]): Stock symbol(s) to unsubscribe from.
"""
self._connect()
if isinstance(symbols, str):
symbols = [symbols]
self._subscriptions.difference_update(symbols)
message = {"unsubscribe": symbols}
self._ws.send(json.dumps(message))
self.logger.info(f"Unsubscribed from symbols: {symbols}")
if self.verbose:
print(f"Unsubscribed from symbols: {symbols}")
def listen(self, message_handler: Optional[Callable[[dict], None]] = None):
"""
Start listening to messages from the WebSocket server.
Args:
message_handler (Optional[Callable[[dict], None]]): Optional function to handle received messages.
"""
self._connect()
self.logger.info("Listening for messages...")
if self.verbose:
print("Listening for messages...")
while True:
try:
message = self._ws.recv()
message_json = json.loads(message)
encoded_data = message_json.get("message", "")
decoded_message = self._decode_message(encoded_data)
if message_handler:
try:
message_handler(decoded_message)
except Exception as handler_exception:
if not YfConfig.debug.hide_exceptions:
raise
self.logger.error("Error in message handler: %s", handler_exception, exc_info=True)
if self.verbose:
print("Error in message handler:", handler_exception)
else:
print(decoded_message)
except KeyboardInterrupt:
if self.verbose:
print("Received keyboard interrupt.")
self.close()
break
except Exception as e:
if not YfConfig.debug.hide_exceptions:
raise
self.logger.error("Error while listening to messages: %s", e, exc_info=True)
if self.verbose:
print("Error while listening to messages: %s", e)
break
def close(self):
"""Close the WebSocket connection."""
if self._ws is not None:
self._ws.close()
self.logger.info("WebSocket connection closed.")
if self.verbose:
print("WebSocket connection closed.")
def __enter__(self):
self._connect()
return self
def __exit__(self, exc_type, exc_value, traceback):
self.close()
@@ -0,0 +1,220 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# yfinance - market data downloader
# https://github.com/ranaroussi/yfinance
#
# Copyright 2017-2019 Ran Aroussi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json as _json
import pandas as pd
from . import utils
from .config import YfConfig
from .const import _QUERY1_URL_
from .data import YfData
from .exceptions import YFDataException
LOOKUP_TYPES = ["all", "equity", "mutualfund", "etf", "index", "future", "currency", "cryptocurrency"]
class Lookup:
"""
Fetches quote (ticker) lookups from Yahoo Finance.
:param query: The search query for financial data lookup.
:type query: str
:param session: Custom HTTP session for requests (default None).
:param timeout: Request timeout in seconds (default 30).
:param raise_errors: Raise exceptions on error (default True).
"""
def __init__(self, query: str, session=None, timeout=30, raise_errors=True):
self.session = session
self._data = YfData(session=self.session)
self.query = query
self.timeout = timeout
self.raise_errors = raise_errors
self._logger = utils.get_yf_logger()
self._cache = {}
def _fetch_lookup(self, lookup_type="all", count=25) -> dict:
cache_key = (lookup_type, count)
if cache_key in self._cache:
return self._cache[cache_key]
url = f"{_QUERY1_URL_}/v1/finance/lookup"
params = {
"query": self.query,
"type": lookup_type,
"start": 0,
"count": count,
"formatted": False,
"fetchPricingData": True,
"lang": "en-US",
"region": "US"
}
self._logger.debug(f'GET Lookup for ticker ({self.query}) with parameters: {str(dict(params))}')
data = self._data.get(url=url, params=params, timeout=self.timeout)
if data is None or "Will be right back" in data.text:
raise YFDataException("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***")
try:
data = data.json()
except _json.JSONDecodeError:
if not YfConfig.debug.hide_exceptions:
raise
self._logger.error(f"{self.ticker}: 'lookup' fetch received faulty data")
data = {}
# Error returned
if data.get("finance", {}).get("error", {}):
error = data.get("finance", {}).get("error", {})
raise YFDataException(f"{self.ticker}: 'lookup' fetch returned error: {error}")
self._cache[cache_key] = data
return data
@staticmethod
def _parse_response(response: dict) -> pd.DataFrame:
finance = response.get("finance", {})
result = finance.get("result", [])
result = result[0] if len(result) > 0 else {}
documents = result.get("documents", [])
df = pd.DataFrame(documents)
if "symbol" not in df.columns:
return pd.DataFrame()
return df.set_index("symbol")
def _get_data(self, lookup_type: str, count: int = 25) -> pd.DataFrame:
return self._parse_response(self._fetch_lookup(lookup_type, count))
def get_all(self, count=25) -> pd.DataFrame:
"""
Returns all available financial instruments.
:param count: The number of results to retrieve.
:type count: int
"""
return self._get_data("all", count)
def get_stock(self, count=25) -> pd.DataFrame:
"""
Returns stock related financial instruments.
:param count: The number of results to retrieve.
:type count: int
"""
return self._get_data("equity", count)
def get_mutualfund(self, count=25) -> pd.DataFrame:
"""
Returns mutual funds related financial instruments.
:param count: The number of results to retrieve.
:type count: int
"""
return self._get_data("mutualfund", count)
def get_etf(self, count=25) -> pd.DataFrame:
"""
Returns ETFs related financial instruments.
:param count: The number of results to retrieve.
:type count: int
"""
return self._get_data("etf", count)
def get_index(self, count=25) -> pd.DataFrame:
"""
Returns Indices related financial instruments.
:param count: The number of results to retrieve.
:type count: int
"""
return self._get_data("index", count)
def get_future(self, count=25) -> pd.DataFrame:
"""
Returns Futures related financial instruments.
:param count: The number of results to retrieve.
:type count: int
"""
return self._get_data("future", count)
def get_currency(self, count=25) -> pd.DataFrame:
"""
Returns Currencies related financial instruments.
:param count: The number of results to retrieve.
:type count: int
"""
return self._get_data("currency", count)
def get_cryptocurrency(self, count=25) -> pd.DataFrame:
"""
Returns Cryptocurrencies related financial instruments.
:param count: The number of results to retrieve.
:type count: int
"""
return self._get_data("cryptocurrency", count)
@property
def all(self) -> pd.DataFrame:
"""Returns all available financial instruments."""
return self._get_data("all")
@property
def stock(self) -> pd.DataFrame:
"""Returns stock related financial instruments."""
return self._get_data("equity")
@property
def mutualfund(self) -> pd.DataFrame:
"""Returns mutual funds related financial instruments."""
return self._get_data("mutualfund")
@property
def etf(self) -> pd.DataFrame:
"""Returns ETFs related financial instruments."""
return self._get_data("etf")
@property
def index(self) -> pd.DataFrame:
"""Returns Indices related financial instruments."""
return self._get_data("index")
@property
def future(self) -> pd.DataFrame:
"""Returns Futures related financial instruments."""
return self._get_data("future")
@property
def currency(self) -> pd.DataFrame:
"""Returns Currencies related financial instruments."""
return self._get_data("currency")
@property
def cryptocurrency(self) -> pd.DataFrame:
"""Returns Cryptocurrencies related financial instruments."""
return self._get_data("cryptocurrency")
@@ -0,0 +1,290 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# yfinance - market data downloader
# https://github.com/ranaroussi/yfinance
#
# Copyright 2017-2019 Ran Aroussi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import print_function
import logging
import time as _time
import traceback
from typing import Union
import multitasking as _multitasking
import pandas as _pd
from curl_cffi import requests
from . import Ticker, utils
from .data import YfData
from . import shared
from .config import YfConfig
@utils.log_indent_decorator
def download(tickers, start=None, end=None, actions=False, threads=True,
ignore_tz=None, group_by='column', auto_adjust=True, back_adjust=False,
repair=False, keepna=False, progress=True, period=None, interval="1d",
prepost=False, rounding=False, timeout=10, session=None,
multi_level_index=True) -> Union[_pd.DataFrame, None]:
"""
Download yahoo tickers
:Parameters:
tickers : str, list
List of tickers to download
period : str
Valid periods: 1d,5d,1mo,3mo,6mo,1y,2y,5y,10y,ytd,max
Default: 1mo
Either Use period parameter or use start and end
interval : str
Valid intervals: 1m,2m,5m,15m,30m,60m,90m,1h,1d,5d,1wk,1mo,3mo
Intraday data cannot extend last 60 days
start: str
Download start date string (YYYY-MM-DD) or _datetime, inclusive.
Default is 99 years ago
E.g. for start="2020-01-01", the first data point will be on "2020-01-01"
end: str
Download end date string (YYYY-MM-DD) or _datetime, exclusive.
Default is now
E.g. for end="2023-01-01", the last data point will be on "2022-12-31"
group_by : str
Group by 'ticker' or 'column' (default)
prepost : bool
Include Pre and Post market data in results?
Default is False
auto_adjust: bool
Adjust all OHLC automatically? Default is True
repair: bool
Detect currency unit 100x mixups and attempt repair
Default is False
keepna: bool
Keep NaN rows returned by Yahoo?
Default is False
actions: bool
Download dividend + stock splits data. Default is False
threads: bool / int
How many threads to use for mass downloading. Default is True
ignore_tz: bool
When combining from different timezones, ignore that part of datetime.
Default depends on interval. Intraday = False. Day+ = True.
rounding: bool
Optional. Round values to 2 decimal places?
timeout: None or float
If not None stops waiting for a response after given number of
seconds. (Can also be a fraction of a second e.g. 0.01)
session: None or Session
Optional. Pass your own session object to be used for all requests
multi_level_index: bool
Optional. Always return a MultiIndex DataFrame? Default is True
"""
logger = utils.get_yf_logger()
session = session or requests.Session(impersonate="chrome")
# Ensure data initialised with session.
YfData(session=session)
if logger.isEnabledFor(logging.DEBUG):
if threads:
# With DEBUG, each thread generates a lot of log messages.
# And with multi-threading, these messages will be interleaved, bad!
# So disable multi-threading to make log readable.
logger.debug('Disabling multithreading because DEBUG logging enabled')
threads = False
if progress:
# Disable progress bar, interferes with display of log messages
progress = False
if ignore_tz is None:
# Set default value depending on interval
if interval[-1] in ['m', 'h']:
# Intraday
ignore_tz = False
else:
ignore_tz = True
# create ticker list
tickers = tickers if isinstance(
tickers, (list, set, tuple)) else tickers.replace(',', ' ').split()
# accept isin as ticker
shared._ISINS = {}
_tickers_ = []
for ticker in tickers:
if utils.is_isin(ticker):
isin = ticker
ticker = utils.get_ticker_by_isin(ticker)
shared._ISINS[ticker] = isin
_tickers_.append(ticker)
tickers = _tickers_
tickers = list(set([ticker.upper() for ticker in tickers]))
if progress:
shared._PROGRESS_BAR = utils.ProgressBar(len(tickers), 'completed')
# reset shared._DFS
shared._DFS = {}
shared._ERRORS = {}
shared._TRACEBACKS = {}
# download using threads
if threads:
if threads is True:
threads = min([len(tickers), _multitasking.cpu_count() * 2])
_multitasking.set_max_threads(threads)
for i, ticker in enumerate(tickers):
_download_one_threaded(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, keepna=keepna,
progress=(progress and i > 0),
rounding=rounding, timeout=timeout)
while len(shared._DFS) < len(tickers):
_time.sleep(0.01)
# download synchronously
else:
for i, ticker in enumerate(tickers):
data = _download_one(ticker, period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair, keepna=keepna,
rounding=rounding, timeout=timeout)
if progress:
shared._PROGRESS_BAR.animate()
if progress:
shared._PROGRESS_BAR.completed()
if shared._ERRORS:
# Send errors to logging module
logger = utils.get_yf_logger()
logger.error('\n%.f Failed download%s:' % (
len(shared._ERRORS), 's' if len(shared._ERRORS) > 1 else ''))
# Log each distinct error once, with list of symbols affected
errors = {}
for ticker in shared._ERRORS:
err = shared._ERRORS[ticker]
err = err.replace(f'${ticker}: ', '')
if err not in errors:
errors[err] = [ticker]
else:
errors[err].append(ticker)
for err in errors.keys():
logger.error(f'{errors[err]}: ' + err)
# Log each distinct traceback once, with list of symbols affected
tbs = {}
for ticker in shared._TRACEBACKS:
tb = shared._TRACEBACKS[ticker]
tb = tb.replace(f'${ticker}: ', '')
if tb not in tbs:
tbs[tb] = [ticker]
else:
tbs[tb].append(ticker)
for tb in tbs.keys():
logger.debug(f'{tbs[tb]}: ' + tb)
if ignore_tz:
for tkr in shared._DFS.keys():
if (shared._DFS[tkr] is not None) and (shared._DFS[tkr].shape[0] > 0):
shared._DFS[tkr].index = shared._DFS[tkr].index.tz_localize(None)
try:
data = _pd.concat(shared._DFS.values(), axis=1, sort=True,
keys=shared._DFS.keys(), names=['Ticker', 'Price'])
except Exception:
_realign_dfs()
data = _pd.concat(shared._DFS.values(), axis=1, sort=True,
keys=shared._DFS.keys(), names=['Ticker', 'Price'])
data.index = _pd.to_datetime(data.index, utc=not ignore_tz)
# switch names back to isins if applicable
data.rename(columns=shared._ISINS, inplace=True)
if group_by == 'column':
data.columns = data.columns.swaplevel(0, 1)
data.sort_index(level=0, axis=1, inplace=True)
if not multi_level_index and len(tickers) == 1:
data = data.droplevel(0 if group_by == 'ticker' else 1, axis=1).rename_axis(None, axis=1)
return data
def _realign_dfs():
idx_len = 0
idx = None
for df in shared._DFS.values():
if len(df) > idx_len:
idx_len = len(df)
idx = df.index
for key in shared._DFS.keys():
try:
shared._DFS[key] = _pd.DataFrame(
index=idx, data=shared._DFS[key]).drop_duplicates()
except Exception:
shared._DFS[key] = _pd.concat([
utils.empty_df(idx), shared._DFS[key].dropna()
], axis=0, sort=True)
# remove duplicate index
shared._DFS[key] = shared._DFS[key].loc[
~shared._DFS[key].index.duplicated(keep='last')]
@_multitasking.task
def _download_one_threaded(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False, repair=False,
actions=False, progress=True, period=None,
interval="1d", prepost=False,
keepna=False, rounding=False, timeout=10):
_download_one(ticker, start, end, auto_adjust, back_adjust, repair,
actions, period, interval, prepost, rounding,
keepna, timeout)
if progress:
shared._PROGRESS_BAR.animate()
def _download_one(ticker, start=None, end=None,
auto_adjust=False, back_adjust=False, repair=False,
actions=False, period=None, interval="1d",
prepost=False, rounding=False,
keepna=False, timeout=10):
data = None
backup = YfConfig.network.hide_exceptions
YfConfig.network.hide_exceptions = False
try:
data = Ticker(ticker).history(
period=period, interval=interval,
start=start, end=end, prepost=prepost,
actions=actions, auto_adjust=auto_adjust,
back_adjust=back_adjust, repair=repair,
rounding=rounding, keepna=keepna, timeout=timeout
)
shared._DFS[ticker.upper()] = data
except Exception as e:
shared._DFS[ticker.upper()] = utils.empty_df()
shared._ERRORS[ticker.upper()] = repr(e)
shared._TRACEBACKS[ticker.upper()] = traceback.format_exc()
YfConfig.network.hide_exceptions = backup
return data
@@ -0,0 +1,37 @@
syntax = "proto3";
message PricingData {
string id = 1;
float price = 2;
sint64 time = 3;
string currency = 4;
string exchange = 5;
int32 quote_type = 6;
int32 market_hours = 7;
float change_percent = 8;
sint64 day_volume = 9;
float day_high = 10;
float day_low = 11;
float change = 12;
string short_name = 13;
sint64 expire_date = 14;
float open_price = 15;
float previous_close = 16;
float strike_price = 17;
string underlying_symbol = 18;
sint64 open_interest = 19;
sint64 options_type = 20;
sint64 mini_option = 21;
sint64 last_size = 22;
float bid = 23;
sint64 bid_size = 24;
float ask = 25;
sint64 ask_size = 26;
sint64 price_hint = 27;
sint64 vol_24hr = 28;
sint64 vol_all_currencies = 29;
string from_currency = 30;
string last_market = 31;
double circulating_supply = 32;
double market_cap = 33;
}
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
# Generated by the protocol buffer compiler. DO NOT EDIT!
# source: pricing.proto
"""Generated protocol buffer code."""
from google.protobuf import descriptor as _descriptor
from google.protobuf import descriptor_pool as _descriptor_pool
from google.protobuf import message as _message
from google.protobuf import reflection as _reflection
from google.protobuf import symbol_database as _symbol_database
# @@protoc_insertion_point(imports)
_sym_db = _symbol_database.Default()
DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\rpricing.proto\"\x9a\x05\n\x0bPricingData\x12\n\n\x02id\x18\x01 \x01(\t\x12\r\n\x05price\x18\x02 \x01(\x02\x12\x0c\n\x04time\x18\x03 \x01(\x12\x12\x10\n\x08\x63urrency\x18\x04 \x01(\t\x12\x10\n\x08\x65xchange\x18\x05 \x01(\t\x12\x12\n\nquote_type\x18\x06 \x01(\x05\x12\x14\n\x0cmarket_hours\x18\x07 \x01(\x05\x12\x16\n\x0e\x63hange_percent\x18\x08 \x01(\x02\x12\x12\n\nday_volume\x18\t \x01(\x12\x12\x10\n\x08\x64\x61y_high\x18\n \x01(\x02\x12\x0f\n\x07\x64\x61y_low\x18\x0b \x01(\x02\x12\x0e\n\x06\x63hange\x18\x0c \x01(\x02\x12\x12\n\nshort_name\x18\r \x01(\t\x12\x13\n\x0b\x65xpire_date\x18\x0e \x01(\x12\x12\x12\n\nopen_price\x18\x0f \x01(\x02\x12\x16\n\x0eprevious_close\x18\x10 \x01(\x02\x12\x14\n\x0cstrike_price\x18\x11 \x01(\x02\x12\x19\n\x11underlying_symbol\x18\x12 \x01(\t\x12\x15\n\ropen_interest\x18\x13 \x01(\x12\x12\x14\n\x0coptions_type\x18\x14 \x01(\x12\x12\x13\n\x0bmini_option\x18\x15 \x01(\x12\x12\x11\n\tlast_size\x18\x16 \x01(\x12\x12\x0b\n\x03\x62id\x18\x17 \x01(\x02\x12\x10\n\x08\x62id_size\x18\x18 \x01(\x12\x12\x0b\n\x03\x61sk\x18\x19 \x01(\x02\x12\x10\n\x08\x61sk_size\x18\x1a \x01(\x12\x12\x12\n\nprice_hint\x18\x1b \x01(\x12\x12\x10\n\x08vol_24hr\x18\x1c \x01(\x12\x12\x1a\n\x12vol_all_currencies\x18\x1d \x01(\x12\x12\x15\n\rfrom_currency\x18\x1e \x01(\t\x12\x13\n\x0blast_market\x18\x1f \x01(\t\x12\x1a\n\x12\x63irculating_supply\x18 \x01(\x01\x12\x12\n\nmarket_cap\x18! \x01(\x01\x62\x06proto3')
_PRICINGDATA = DESCRIPTOR.message_types_by_name['PricingData']
PricingData = _reflection.GeneratedProtocolMessageType('PricingData', (_message.Message,), {
'DESCRIPTOR' : _PRICINGDATA,
'__module__' : 'pricing_pb2'
# @@protoc_insertion_point(class_scope:PricingData)
})
_sym_db.RegisterMessage(PricingData)
if not _descriptor._USE_C_DESCRIPTORS:
DESCRIPTOR._options = None
_PRICINGDATA._serialized_start=18
_PRICINGDATA._serialized_end=684
# @@protoc_insertion_point(module_scope)
@@ -0,0 +1,198 @@
import curl_cffi
import pandas as pd
from yfinance import utils
from yfinance.config import YfConfig
from yfinance.const import quote_summary_valid_modules
from yfinance.data import YfData
from yfinance.exceptions import YFException
from yfinance.scrapers.quote import _QUOTE_SUMMARY_URL_
class Analysis:
def __init__(self, data: YfData, symbol: str):
self._data = data
self._symbol = symbol
# In quoteSummary the 'earningsTrend' module contains most of the data below.
# The format of data is not optimal so each function will process it's part of the data.
# This variable works as a cache.
self._earnings_trend = None
self._analyst_price_targets = None
self._earnings_estimate = None
self._revenue_estimate = None
self._earnings_history = None
self._eps_trend = None
self._eps_revisions = None
self._growth_estimates = None
def _get_periodic_df(self, key) -> pd.DataFrame:
if self._earnings_trend is None:
self._fetch_earnings_trend()
data = []
for item in self._earnings_trend[:4]:
row = {'period': item['period']}
for k, v in item[key].items():
if not isinstance(v, dict) or len(v) == 0:
continue
row[k] = v['raw']
data.append(row)
if len(data) == 0:
return pd.DataFrame()
return pd.DataFrame(data).set_index('period')
@property
def earnings_estimate(self) -> pd.DataFrame:
if self._earnings_estimate is not None:
return self._earnings_estimate
self._earnings_estimate = self._get_periodic_df('earningsEstimate')
return self._earnings_estimate
@property
def revenue_estimate(self) -> pd.DataFrame:
if self._revenue_estimate is not None:
return self._revenue_estimate
self._revenue_estimate = self._get_periodic_df('revenueEstimate')
return self._revenue_estimate
@property
def eps_trend(self) -> pd.DataFrame:
if self._eps_trend is not None:
return self._eps_trend
self._eps_trend = self._get_periodic_df('epsTrend')
return self._eps_trend
@property
def eps_revisions(self) -> pd.DataFrame:
if self._eps_revisions is not None:
return self._eps_revisions
self._eps_revisions = self._get_periodic_df('epsRevisions')
return self._eps_revisions
@property
def analyst_price_targets(self) -> dict:
if self._analyst_price_targets is not None:
return self._analyst_price_targets
try:
data = self._fetch(['financialData'])
data = data['quoteSummary']['result'][0]['financialData']
except (TypeError, KeyError):
if not YfConfig.debug.hide_exceptions:
raise
self._analyst_price_targets = {}
return self._analyst_price_targets
result = {}
for key, value in data.items():
if key.startswith('target'):
new_key = key.replace('target', '').lower().replace('price', '').strip()
result[new_key] = value
elif key == 'currentPrice':
result['current'] = value
self._analyst_price_targets = result
return self._analyst_price_targets
@property
def earnings_history(self) -> pd.DataFrame:
if self._earnings_history is not None:
return self._earnings_history
try:
data = self._fetch(['earningsHistory'])
data = data['quoteSummary']['result'][0]['earningsHistory']['history']
except (TypeError, KeyError):
if not YfConfig.debug.hide_exceptions:
raise
self._earnings_history = pd.DataFrame()
return self._earnings_history
rows = []
for item in data:
row = {'quarter': item.get('quarter', {}).get('fmt', None)}
for k, v in item.items():
if k == 'quarter':
continue
if not isinstance(v, dict) or len(v) == 0:
continue
row[k] = v.get('raw', None)
rows.append(row)
if len(data) == 0:
return pd.DataFrame()
df = pd.DataFrame(rows)
if 'quarter' in df.columns:
df['quarter'] = pd.to_datetime(df['quarter'], format='%Y-%m-%d')
df.set_index('quarter', inplace=True)
self._earnings_history = df
return self._earnings_history
@property
def growth_estimates(self) -> pd.DataFrame:
if self._growth_estimates is not None:
return self._growth_estimates
if self._earnings_trend is None:
self._fetch_earnings_trend()
try:
trends = self._fetch(['industryTrend', 'sectorTrend', 'indexTrend'])
trends = trends['quoteSummary']['result'][0]
except (TypeError, KeyError):
if not YfConfig.debug.hide_exceptions:
raise
self._growth_estimates = pd.DataFrame()
return self._growth_estimates
data = []
for item in self._earnings_trend:
period = item['period']
row = {'period': period, 'stockTrend': item.get('growth', {}).get('raw', None)}
data.append(row)
for trend_name, trend_info in trends.items():
if trend_info.get('estimates'):
for estimate in trend_info['estimates']:
period = estimate['period']
existing_row = next((row for row in data if row['period'] == period), None)
if existing_row:
existing_row[trend_name] = estimate.get('growth')
else:
row = {'period': period, trend_name: estimate.get('growth')}
data.append(row)
if len(data) == 0:
return pd.DataFrame()
self._growth_estimates = pd.DataFrame(data).set_index('period').dropna(how='all')
return self._growth_estimates
# modified version from quote.py
def _fetch(self, modules: list):
if not isinstance(modules, list):
raise YFException("Should provide a list of modules, see available modules using `valid_modules`")
modules = ','.join([m for m in modules if m in quote_summary_valid_modules])
if len(modules) == 0:
raise YFException("No valid modules provided, see available modules using `valid_modules`")
params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "formatted": "false", "symbol": self._symbol}
try:
result = self._data.get_raw_json(_QUOTE_SUMMARY_URL_ + f"/{self._symbol}", params=params_dict)
except curl_cffi.requests.exceptions.HTTPError as e:
if not YfConfig.debug.hide_exceptions:
raise
utils.get_yf_logger().error(str(e) + e.response.text)
return None
return result
def _fetch_earnings_trend(self) -> None:
try:
data = self._fetch(['earningsTrend'])
self._earnings_trend = data['quoteSummary']['result'][0]['earningsTrend']['trend']
except (TypeError, KeyError):
if not YfConfig.debug.hide_exceptions:
raise
self._earnings_trend = []
@@ -0,0 +1,163 @@
import datetime
import json
import warnings
import pandas as pd
from yfinance import utils, const
from yfinance.config import YfConfig
from yfinance.data import YfData
from yfinance.exceptions import YFException, YFNotImplementedError
class Fundamentals:
def __init__(self, data: YfData, symbol: str):
self._data = data
self._symbol = symbol
self._earnings = None
self._financials = None
self._shares = None
self._financials_data = None
self._fin_data_quote = None
self._basics_already_scraped = False
self._financials = Financials(data, symbol)
@property
def financials(self) -> "Financials":
return self._financials
@property
def earnings(self) -> dict:
warnings.warn("'Ticker.earnings' is deprecated as not available via API. Look for \"Net Income\" in Ticker.income_stmt.", DeprecationWarning)
return None
@property
def shares(self) -> pd.DataFrame:
if self._shares is None:
raise YFNotImplementedError('shares')
return self._shares
class Financials:
def __init__(self, data: YfData, symbol: str):
self._data = data
self._symbol = symbol
self._income_time_series = {}
self._balance_sheet_time_series = {}
self._cash_flow_time_series = {}
def get_income_time_series(self, freq="yearly") -> pd.DataFrame:
res = self._income_time_series
if freq not in res:
res[freq] = self._fetch_time_series("income", freq)
return res[freq]
def get_balance_sheet_time_series(self, freq="yearly") -> pd.DataFrame:
res = self._balance_sheet_time_series
if freq not in res:
res[freq] = self._fetch_time_series("balance-sheet", freq)
return res[freq]
def get_cash_flow_time_series(self, freq="yearly") -> pd.DataFrame:
res = self._cash_flow_time_series
if freq not in res:
res[freq] = self._fetch_time_series("cash-flow", freq)
return res[freq]
@utils.log_indent_decorator
def _fetch_time_series(self, name, timescale):
# Fetching time series preferred over scraping 'QuoteSummaryStore',
# because it matches what Yahoo shows. But for some tickers returns nothing,
# despite 'QuoteSummaryStore' containing valid data.
allowed_names = ["income", "balance-sheet", "cash-flow"]
allowed_timescales = ["yearly", "quarterly", "trailing"]
if name not in allowed_names:
raise ValueError(f"Illegal argument: name must be one of: {allowed_names}")
if timescale not in allowed_timescales:
raise ValueError(f"Illegal argument: timescale must be one of: {allowed_timescales}")
if timescale == "trailing" and name not in ('income', 'cash-flow'):
raise ValueError("Illegal argument: frequency 'trailing'" +
" only available for cash-flow or income data.")
try:
statement = self._create_financials_table(name, timescale)
if statement is not None:
return statement
except YFException as e:
if not YfConfig.debug.hide_exceptions:
raise
utils.get_yf_logger().error(f"{self._symbol}: Failed to create {name} financials table for reason: {e}")
return pd.DataFrame()
def _create_financials_table(self, name, timescale):
if name == "income":
# Yahoo stores the 'income' table internally under 'financials' key
name = "financials"
keys = const.fundamentals_keys[name]
try:
return self._get_financials_time_series(timescale, keys)
except Exception:
if not YfConfig.debug.hide_exceptions:
raise
pass
def _get_financials_time_series(self, timescale, keys: list) -> pd.DataFrame:
timescale_translation = {"yearly": "annual", "quarterly": "quarterly", "trailing": "trailing"}
timescale = timescale_translation[timescale]
# Step 2: construct url:
ts_url_base = f"https://query2.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{self._symbol}?symbol={self._symbol}"
url = ts_url_base + "&type=" + ",".join([timescale + k for k in keys])
# Yahoo returns maximum 4 years or 5 quarters, regardless of start_dt:
start_dt = datetime.datetime(2016, 12, 31)
end = pd.Timestamp.now('UTC').ceil("D")
url += f"&period1={int(start_dt.timestamp())}&period2={int(end.timestamp())}"
# Step 3: fetch and reshape data
json_str = self._data.cache_get(url=url).text
json_data = json.loads(json_str)
data_raw = json_data["timeseries"]["result"]
# data_raw = [v for v in data_raw if len(v) > 1] # Discard keys with no data
for d in data_raw:
del d["meta"]
# Now reshape data into a table:
# Step 1: get columns and index:
timestamps = set()
data_unpacked = {}
for x in data_raw:
for k in x.keys():
if k == "timestamp":
timestamps.update(x[k])
else:
data_unpacked[k] = x[k]
timestamps = sorted(list(timestamps))
dates = pd.to_datetime(timestamps, unit="s")
df = pd.DataFrame(columns=dates, index=list(data_unpacked.keys()))
for k, v in data_unpacked.items():
if df is None:
df = pd.DataFrame(columns=dates, index=[k])
df.loc[k] = {pd.Timestamp(x["asOfDate"]): x["reportedValue"]["raw"] for x in v}
df.index = df.index.str.replace("^" + timescale, "", regex=True)
# Ensure float type, not object
for d in df.columns:
df[d] = df[d].astype('float')
# Reorder table to match order on Yahoo website
df = df.reindex([k for k in keys if k in df.index])
df = df[sorted(df.columns, reverse=True)]
# Trailing 12 months return only the first column.
if (timescale == "trailing"):
df = df.iloc[:, [0]]
return df
@@ -0,0 +1,336 @@
import pandas as pd
from typing import Dict, Optional
from yfinance import utils
from yfinance.config import YfConfig
from yfinance.const import _BASE_URL_
from yfinance.data import YfData
from yfinance.exceptions import YFDataException
_QUOTE_SUMMARY_URL_ = f"{_BASE_URL_}/v10/finance/quoteSummary/"
class FundsData:
"""
ETF and Mutual Funds Data
Queried Modules: quoteType, summaryProfile, fundProfile, topHoldings
Notes:
- fundPerformance module is not implemented as better data is queryable using history
"""
def __init__(self, data: YfData, symbol: str):
"""
Args:
data (YfData): The YfData object for fetching data.
symbol (str): The symbol of the fund.
"""
self._data = data
self._symbol = symbol
# quoteType
self._quote_type = None
# summaryProfile
self._description = None
# fundProfile
self._fund_overview = None
self._fund_operations = None
# topHoldings
self._asset_classes = None
self._top_holdings = None
self._equity_holdings = None
self._bond_holdings = None
self._bond_ratings = None
self._sector_weightings = None
def quote_type(self) -> str:
"""
Returns the quote type of the fund.
Returns:
str: The quote type.
"""
if self._quote_type is None:
self._fetch_and_parse()
return self._quote_type
@property
def description(self) -> str:
"""
Returns the description of the fund.
Returns:
str: The description.
"""
if self._description is None:
self._fetch_and_parse()
return self._description
@property
def fund_overview(self) -> Dict[str, Optional[str]]:
"""
Returns the fund overview.
Returns:
Dict[str, Optional[str]]: The fund overview.
"""
if self._fund_overview is None:
self._fetch_and_parse()
return self._fund_overview
@property
def fund_operations(self) -> pd.DataFrame:
"""
Returns the fund operations.
Returns:
pd.DataFrame: The fund operations.
"""
if self._fund_operations is None:
self._fetch_and_parse()
return self._fund_operations
@property
def asset_classes(self) -> Dict[str, float]:
"""
Returns the asset classes of the fund.
Returns:
Dict[str, float]: The asset classes.
"""
if self._asset_classes is None:
self._fetch_and_parse()
return self._asset_classes
@property
def top_holdings(self) -> pd.DataFrame:
"""
Returns the top holdings of the fund.
Returns:
pd.DataFrame: The top holdings.
"""
if self._top_holdings is None:
self._fetch_and_parse()
return self._top_holdings
@property
def equity_holdings(self) -> pd.DataFrame:
"""
Returns the equity holdings of the fund.
Returns:
pd.DataFrame: The equity holdings.
"""
if self._equity_holdings is None:
self._fetch_and_parse()
return self._equity_holdings
@property
def bond_holdings(self) -> pd.DataFrame:
"""
Returns the bond holdings of the fund.
Returns:
pd.DataFrame: The bond holdings.
"""
if self._bond_holdings is None:
self._fetch_and_parse()
return self._bond_holdings
@property
def bond_ratings(self) -> Dict[str, float]:
"""
Returns the bond ratings of the fund.
Returns:
Dict[str, float]: The bond ratings.
"""
if self._bond_ratings is None:
self._fetch_and_parse()
return self._bond_ratings
@property
def sector_weightings(self) -> Dict[str,float]:
"""
Returns the sector weightings of the fund.
Returns:
Dict[str, float]: The sector weightings.
"""
if self._sector_weightings is None:
self._fetch_and_parse()
return self._sector_weightings
def _fetch(self):
"""
Fetches the raw JSON data from the API.
Returns:
dict: The raw JSON data.
"""
modules = ','.join(["quoteType", "summaryProfile", "topHoldings", "fundProfile"])
params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "symbol": self._symbol, "formatted": "false"}
result = self._data.get_raw_json(_QUOTE_SUMMARY_URL_+self._symbol, params=params_dict)
return result
def _fetch_and_parse(self) -> None:
"""
Fetches and parses the data from the API.
"""
result = self._fetch()
try:
data = result["quoteSummary"]["result"][0]
# check quote type
self._quote_type = data["quoteType"]["quoteType"]
# parse "summaryProfile", "topHoldings", "fundProfile"
self._parse_description(data["summaryProfile"])
self._parse_top_holdings(data["topHoldings"])
self._parse_fund_profile(data["fundProfile"])
except KeyError:
if not YfConfig.debug.hide_exceptions:
raise
raise YFDataException(f"{self._symbol}: No Fund data found.")
except Exception as e:
if not YfConfig.debug.hide_exceptions:
raise
logger = utils.get_yf_logger()
logger.error(f"Failed to get fund data for '{self._symbol}' reason: {e}")
logger.debug("Got response: ")
logger.debug("-------------")
logger.debug(f" {data}")
logger.debug("-------------")
@staticmethod
def _parse_raw_values(data, default=None):
"""
Parses raw values from the data.
Args:
data: The data to parse.
default: The default value if data is not a dictionary.
Returns:
The parsed value or the default value.
"""
if not isinstance(data, dict):
return data
return data.get("raw", default)
def _parse_description(self, data) -> None:
"""
Parses the description from the data.
Args:
data: The data to parse.
"""
self._description = data.get("longBusinessSummary", "")
def _parse_top_holdings(self, data) -> None:
"""
Parses the top holdings from the data.
Args:
data: The data to parse.
"""
# asset classes
self._asset_classes = {
"cashPosition": self._parse_raw_values(data.get("cashPosition", None)),
"stockPosition": self._parse_raw_values(data.get("stockPosition", None)),
"bondPosition": self._parse_raw_values(data.get("bondPosition", None)),
"preferredPosition": self._parse_raw_values(data.get("preferredPosition", None)),
"convertiblePosition": self._parse_raw_values(data.get("convertiblePosition", None)),
"otherPosition": self._parse_raw_values(data.get("otherPosition", None))
}
# top holdings
_holdings = data.get("holdings", [])
_symbol, _name, _holding_percent = [], [], []
for item in _holdings:
_symbol.append(item["symbol"])
_name.append(item["holdingName"])
_holding_percent.append(item["holdingPercent"])
self._top_holdings = pd.DataFrame({
"Symbol": _symbol,
"Name": _name,
"Holding Percent": _holding_percent
}).set_index("Symbol")
# equity holdings
_equity_holdings = data.get("equityHoldings", {})
self._equity_holdings = pd.DataFrame({
"Average": ["Price/Earnings", "Price/Book", "Price/Sales", "Price/Cashflow", "Median Market Cap", "3 Year Earnings Growth"],
self._symbol: [
self._parse_raw_values(_equity_holdings.get("priceToEarnings", pd.NA)),
self._parse_raw_values(_equity_holdings.get("priceToBook", pd.NA)),
self._parse_raw_values(_equity_holdings.get("priceToSales", pd.NA)),
self._parse_raw_values(_equity_holdings.get("priceToCashflow", pd.NA)),
self._parse_raw_values(_equity_holdings.get("medianMarketCap", pd.NA)),
self._parse_raw_values(_equity_holdings.get("threeYearEarningsGrowth", pd.NA)),
],
"Category Average": [
self._parse_raw_values(_equity_holdings.get("priceToEarningsCat", pd.NA)),
self._parse_raw_values(_equity_holdings.get("priceToBookCat", pd.NA)),
self._parse_raw_values(_equity_holdings.get("priceToSalesCat", pd.NA)),
self._parse_raw_values(_equity_holdings.get("priceToCashflowCat", pd.NA)),
self._parse_raw_values(_equity_holdings.get("medianMarketCapCat", pd.NA)),
self._parse_raw_values(_equity_holdings.get("threeYearEarningsGrowthCat", pd.NA)),
]
}).set_index("Average")
# bond holdings
_bond_holdings = data.get("bondHoldings", {})
self._bond_holdings = pd.DataFrame({
"Average": ["Duration", "Maturity", "Credit Quality"],
self._symbol: [
self._parse_raw_values(_bond_holdings.get("duration", pd.NA)),
self._parse_raw_values(_bond_holdings.get("maturity", pd.NA)),
self._parse_raw_values(_bond_holdings.get("creditQuality", pd.NA)),
],
"Category Average": [
self._parse_raw_values(_bond_holdings.get("durationCat", pd.NA)),
self._parse_raw_values(_bond_holdings.get("maturityCat", pd.NA)),
self._parse_raw_values(_bond_holdings.get("creditQualityCat", pd.NA)),
]
}).set_index("Average")
# bond ratings
self._bond_ratings = dict((key, d[key]) for d in data.get("bondRatings", []) for key in d)
# sector weightings
self._sector_weightings = dict((key, d[key]) for d in data.get("sectorWeightings", []) for key in d)
def _parse_fund_profile(self, data):
"""
Parses the fund profile from the data.
Args:
data: The data to parse.
"""
self._fund_overview = {
"categoryName": data.get("categoryName", None),
"family": data.get("family", None),
"legalType": data.get("legalType", None)
}
_fund_operations = data.get("feesExpensesInvestment", {})
_fund_operations_cat = data.get("feesExpensesInvestmentCat", {})
self._fund_operations = pd.DataFrame({
"Attributes": ["Annual Report Expense Ratio", "Annual Holdings Turnover", "Total Net Assets"],
self._symbol: [
self._parse_raw_values(_fund_operations.get("annualReportExpenseRatio", pd.NA)),
self._parse_raw_values(_fund_operations.get("annualHoldingsTurnover", pd.NA)),
self._parse_raw_values(_fund_operations.get("totalNetAssets", pd.NA))
],
"Category Average": [
self._parse_raw_values(_fund_operations_cat.get("annualReportExpenseRatio", pd.NA)),
self._parse_raw_values(_fund_operations_cat.get("annualHoldingsTurnover", pd.NA)),
self._parse_raw_values(_fund_operations_cat.get("totalNetAssets", pd.NA))
]
}).set_index("Attributes")
@@ -0,0 +1,241 @@
import curl_cffi
import pandas as pd
from yfinance import utils
from yfinance.config import YfConfig
from yfinance.const import _BASE_URL_
from yfinance.data import YfData
from yfinance.exceptions import YFDataException
_QUOTE_SUMMARY_URL_ = f"{_BASE_URL_}/v10/finance/quoteSummary"
class Holders:
_SCRAPE_URL_ = 'https://finance.yahoo.com/quote'
def __init__(self, data: YfData, symbol: str):
self._data = data
self._symbol = symbol
self._major = None
self._major_direct_holders = None
self._institutional = None
self._mutualfund = None
self._insider_transactions = None
self._insider_purchases = None
self._insider_roster = None
@property
def major(self) -> pd.DataFrame:
if self._major is None:
self._fetch_and_parse()
return self._major
@property
def institutional(self) -> pd.DataFrame:
if self._institutional is None:
self._fetch_and_parse()
return self._institutional
@property
def mutualfund(self) -> pd.DataFrame:
if self._mutualfund is None:
self._fetch_and_parse()
return self._mutualfund
@property
def insider_transactions(self) -> pd.DataFrame:
if self._insider_transactions is None:
self._fetch_and_parse()
return self._insider_transactions
@property
def insider_purchases(self) -> pd.DataFrame:
if self._insider_purchases is None:
self._fetch_and_parse()
return self._insider_purchases
@property
def insider_roster(self) -> pd.DataFrame:
if self._insider_roster is None:
self._fetch_and_parse()
return self._insider_roster
def _fetch(self):
modules = ','.join(
["institutionOwnership", "fundOwnership", "majorDirectHolders", "majorHoldersBreakdown", "insiderTransactions", "insiderHolders", "netSharePurchaseActivity"])
params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "formatted": "false"}
result = self._data.get_raw_json(f"{_QUOTE_SUMMARY_URL_}/{self._symbol}", params=params_dict)
return result
def _fetch_and_parse(self):
try:
result = self._fetch()
except curl_cffi.requests.exceptions.HTTPError as e:
if not YfConfig.debug.hide_exceptions:
raise
utils.get_yf_logger().error(str(e) + e.response.text)
self._major = pd.DataFrame()
self._major_direct_holders = pd.DataFrame()
self._institutional = pd.DataFrame()
self._mutualfund = pd.DataFrame()
self._insider_transactions = pd.DataFrame()
self._insider_purchases = pd.DataFrame()
self._insider_roster = pd.DataFrame()
return
try:
data = result["quoteSummary"]["result"][0]
# parse "institutionOwnership", "fundOwnership", "majorDirectHolders", "majorHoldersBreakdown", "insiderTransactions", "insiderHolders", "netSharePurchaseActivity"
self._parse_institution_ownership(data.get("institutionOwnership", {}))
self._parse_fund_ownership(data.get("fundOwnership", {}))
# self._parse_major_direct_holders(data.get("majorDirectHolders", {})) # need more data to investigate
self._parse_major_holders_breakdown(data.get("majorHoldersBreakdown", {}))
self._parse_insider_transactions(data.get("insiderTransactions", {}))
self._parse_insider_holders(data.get("insiderHolders", {}))
self._parse_net_share_purchase_activity(data.get("netSharePurchaseActivity", {}))
except (KeyError, IndexError):
if not YfConfig.debug.hide_exceptions:
raise
raise YFDataException("Failed to parse holders json data.")
@staticmethod
def _parse_raw_values(data):
if isinstance(data, dict) and "raw" in data:
return data["raw"]
return data
def _parse_institution_ownership(self, data):
holders = data.get("ownershipList", {})
for owner in holders:
for k, v in owner.items():
owner[k] = self._parse_raw_values(v)
del owner["maxAge"]
df = pd.DataFrame(holders)
if not df.empty:
df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s")
df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "position": "Shares", "value": "Value"}, inplace=True) # "pctHeld": "% Out"
self._institutional = df
def _parse_fund_ownership(self, data):
holders = data.get("ownershipList", {})
for owner in holders:
for k, v in owner.items():
owner[k] = self._parse_raw_values(v)
del owner["maxAge"]
df = pd.DataFrame(holders)
if not df.empty:
df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s")
df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "position": "Shares", "value": "Value"}, inplace=True)
self._mutualfund = df
def _parse_major_direct_holders(self, data):
holders = data.get("holders", {})
for owner in holders:
for k, v in owner.items():
owner[k] = self._parse_raw_values(v)
del owner["maxAge"]
df = pd.DataFrame(holders)
if not df.empty:
df["reportDate"] = pd.to_datetime(df["reportDate"], unit="s")
df.rename(columns={"reportDate": "Date Reported", "organization": "Holder", "positionDirect": "Shares", "valueDirect": "Value"}, inplace=True)
self._major_direct_holders = df
def _parse_major_holders_breakdown(self, data):
if "maxAge" in data:
del data["maxAge"]
df = pd.DataFrame.from_dict(data, orient="index")
if not df.empty:
df.columns.name = "Breakdown"
df.rename(columns={df.columns[0]: 'Value'}, inplace=True)
self._major = df
def _parse_insider_transactions(self, data):
holders = data.get("transactions", {})
for owner in holders:
for k, v in owner.items():
owner[k] = self._parse_raw_values(v)
del owner["maxAge"]
df = pd.DataFrame(holders)
if not df.empty:
df["startDate"] = pd.to_datetime(df["startDate"], unit="s")
df.rename(columns={
"startDate": "Start Date",
"filerName": "Insider",
"filerRelation": "Position",
"filerUrl": "URL",
"moneyText": "Transaction",
"transactionText": "Text",
"shares": "Shares",
"value": "Value",
"ownership": "Ownership" # ownership flag, direct or institutional
}, inplace=True)
self._insider_transactions = df
def _parse_insider_holders(self, data):
holders = data.get("holders", {})
for owner in holders:
for k, v in owner.items():
owner[k] = self._parse_raw_values(v)
del owner["maxAge"]
df = pd.DataFrame(holders)
if not df.empty:
if "positionDirectDate" in df:
df["positionDirectDate"] = pd.to_datetime(df["positionDirectDate"], unit="s")
if "latestTransDate" in df:
df["latestTransDate"] = pd.to_datetime(df["latestTransDate"], unit="s")
df.rename(columns={
"name": "Name",
"relation": "Position",
"url": "URL",
"transactionDescription": "Most Recent Transaction",
"latestTransDate": "Latest Transaction Date",
"positionDirectDate": "Position Direct Date",
"positionDirect": "Shares Owned Directly",
"positionIndirectDate": "Position Indirect Date",
"positionIndirect": "Shares Owned Indirectly"
}, inplace=True)
df["Name"] = df["Name"].astype(str)
df["Position"] = df["Position"].astype(str)
df["URL"] = df["URL"].astype(str)
df["Most Recent Transaction"] = df["Most Recent Transaction"].astype(str)
self._insider_roster = df
def _parse_net_share_purchase_activity(self, data):
df = pd.DataFrame(
{
"Insider Purchases Last " + data.get("period", ""): [
"Purchases",
"Sales",
"Net Shares Purchased (Sold)",
"Total Insider Shares Held",
"% Net Shares Purchased (Sold)",
"% Buy Shares",
"% Sell Shares"
],
"Shares": [
data.get('buyInfoShares'),
data.get('sellInfoShares'),
data.get('netInfoShares'),
data.get('totalInsiderShares'),
data.get('netPercentInsiderShares'),
data.get('buyPercentInsiderShares'),
data.get('sellPercentInsiderShares')
],
"Trans": [
data.get('buyInfoCount'),
data.get('sellInfoCount'),
data.get('netInfoCount'),
pd.NA,
pd.NA,
pd.NA,
pd.NA
]
}
).convert_dtypes()
self._insider_purchases = df
@@ -0,0 +1,779 @@
import curl_cffi
import datetime
import json
import numpy as _np
import pandas as pd
from yfinance import utils
from yfinance.config import YfConfig
from yfinance.const import quote_summary_valid_modules, _BASE_URL_, _QUERY1_URL_
from yfinance.data import YfData
from yfinance.exceptions import YFDataException, YFException
info_retired_keys_price = {"currentPrice", "dayHigh", "dayLow", "open", "previousClose", "volume", "volume24Hr"}
info_retired_keys_price.update({"regularMarket"+s for s in ["DayHigh", "DayLow", "Open", "PreviousClose", "Price", "Volume"]})
info_retired_keys_price.update({"fiftyTwoWeekLow", "fiftyTwoWeekHigh", "fiftyTwoWeekChange", "52WeekChange", "fiftyDayAverage", "twoHundredDayAverage"})
info_retired_keys_price.update({"averageDailyVolume10Day", "averageVolume10days", "averageVolume"})
info_retired_keys_exchange = {"currency", "exchange", "exchangeTimezoneName", "exchangeTimezoneShortName", "quoteType"}
info_retired_keys_marketCap = {"marketCap"}
info_retired_keys_symbol = {"symbol"}
info_retired_keys = info_retired_keys_price | info_retired_keys_exchange | info_retired_keys_marketCap | info_retired_keys_symbol
_QUOTE_SUMMARY_URL_ = f"{_BASE_URL_}/v10/finance/quoteSummary"
class FastInfo:
# Contain small subset of info[] items that can be fetched faster elsewhere.
# Imitates a dict.
def __init__(self, tickerBaseObject):
self._tkr = tickerBaseObject
self._prices_1y = None
self._prices_1wk_1h_prepost = None
self._prices_1wk_1h_reg = None
self._md = None
self._currency = None
self._quote_type = None
self._exchange = None
self._timezone = None
self._shares = None
self._mcap = None
self._open = None
self._day_high = None
self._day_low = None
self._last_price = None
self._last_volume = None
self._prev_close = None
self._reg_prev_close = None
self._50d_day_average = None
self._200d_day_average = None
self._year_high = None
self._year_low = None
self._year_change = None
self._10d_avg_vol = None
self._3mo_avg_vol = None
# attrs = utils.attributes(self)
# self.keys = attrs.keys()
# utils.attributes is calling each method, bad! Have to hardcode
_properties = ["currency", "quote_type", "exchange", "timezone"]
_properties += ["shares", "market_cap"]
_properties += ["last_price", "previous_close", "open", "day_high", "day_low"]
_properties += ["regular_market_previous_close"]
_properties += ["last_volume"]
_properties += ["fifty_day_average", "two_hundred_day_average", "ten_day_average_volume", "three_month_average_volume"]
_properties += ["year_high", "year_low", "year_change"]
# Because released before fixing key case, need to officially support
# camel-case but also secretly support snake-case
base_keys = [k for k in _properties if '_' not in k]
sc_keys = [k for k in _properties if '_' in k]
self._sc_to_cc_key = {k: utils.snake_case_2_camelCase(k) for k in sc_keys}
self._cc_to_sc_key = {v: k for k, v in self._sc_to_cc_key.items()}
self._public_keys = sorted(base_keys + list(self._sc_to_cc_key.values()))
self._keys = sorted(self._public_keys + sc_keys)
# dict imitation:
def keys(self):
return self._public_keys
def items(self):
return [(k, self[k]) for k in self._public_keys]
def values(self):
return [self[k] for k in self._public_keys]
def get(self, key, default=None):
if key in self.keys():
if key in self._cc_to_sc_key:
key = self._cc_to_sc_key[key]
return self[key]
return default
def __getitem__(self, k):
if not isinstance(k, str):
raise KeyError(f"key must be a string not '{type(k)}'")
if k not in self._keys:
raise KeyError(f"'{k}' not valid key. Examine 'FastInfo.keys()'")
if k in self._cc_to_sc_key:
k = self._cc_to_sc_key[k]
return getattr(self, k)
def __contains__(self, k):
return k in self.keys()
def __iter__(self):
return iter(self.keys())
def __str__(self):
return "lazy-loading dict with keys = " + str(self.keys())
def __repr__(self):
return self.__str__()
def toJSON(self, indent=4):
return json.dumps({k: self[k] for k in self.keys()}, indent=indent)
def _get_1y_prices(self, fullDaysOnly=False):
if self._prices_1y is None:
self._prices_1y = self._tkr.history(period="1y", auto_adjust=False, keepna=True)
self._md = self._tkr.get_history_metadata()
try:
ctp = self._md["currentTradingPeriod"]
self._today_open = pd.to_datetime(ctp["regular"]["start"], unit='s', utc=True).tz_convert(self.timezone)
self._today_close = pd.to_datetime(ctp["regular"]["end"], unit='s', utc=True).tz_convert(self.timezone)
self._today_midnight = self._today_close.ceil("D")
except Exception:
self._today_open = None
self._today_close = None
self._today_midnight = None
raise
if self._prices_1y.empty:
return self._prices_1y
dnow = pd.Timestamp.now('UTC').tz_convert(self.timezone).date()
d1 = dnow
d0 = (d1 + datetime.timedelta(days=1)) - utils._interval_to_timedelta("1y")
if fullDaysOnly and self._exchange_open_now():
# Exclude today
d1 -= utils._interval_to_timedelta("1d")
return self._prices_1y.loc[str(d0):str(d1)]
def _get_1wk_1h_prepost_prices(self):
if self._prices_1wk_1h_prepost is None:
self._prices_1wk_1h_prepost = self._tkr.history(period="5d", interval="1h", auto_adjust=False, prepost=True)
return self._prices_1wk_1h_prepost
def _get_1wk_1h_reg_prices(self):
if self._prices_1wk_1h_reg is None:
self._prices_1wk_1h_reg = self._tkr.history(period="5d", interval="1h", auto_adjust=False, prepost=False)
return self._prices_1wk_1h_reg
def _get_exchange_metadata(self):
if self._md is not None:
return self._md
self._get_1y_prices()
self._md = self._tkr.get_history_metadata()
return self._md
def _exchange_open_now(self):
t = pd.Timestamp.now('UTC')
self._get_exchange_metadata()
# if self._today_open is None and self._today_close is None:
# r = False
# else:
# r = self._today_open <= t and t < self._today_close
# if self._today_midnight is None:
# r = False
# elif self._today_midnight.date() > t.tz_convert(self.timezone).date():
# r = False
# else:
# r = t < self._today_midnight
last_day_cutoff = self._get_1y_prices().index[-1] + datetime.timedelta(days=1)
last_day_cutoff += datetime.timedelta(minutes=20)
r = t < last_day_cutoff
# print("_exchange_open_now() returning", r)
return r
@property
def currency(self):
if self._currency is not None:
return self._currency
md = self._tkr.get_history_metadata()
self._currency = md["currency"]
return self._currency
@property
def quote_type(self):
if self._quote_type is not None:
return self._quote_type
md = self._tkr.get_history_metadata()
self._quote_type = md["instrumentType"]
return self._quote_type
@property
def exchange(self):
if self._exchange is not None:
return self._exchange
self._exchange = self._get_exchange_metadata()["exchangeName"]
return self._exchange
@property
def timezone(self):
if self._timezone is not None:
return self._timezone
self._timezone = self._get_exchange_metadata()["exchangeTimezoneName"]
return self._timezone
@property
def shares(self):
if self._shares is not None:
return self._shares
shares = self._tkr.get_shares_full(start=pd.Timestamp.now('UTC').date()-pd.Timedelta(days=548))
# if shares is None:
# # Requesting 18 months failed, so fallback to shares which should include last year
# shares = self._tkr.get_shares()
if shares is not None:
if isinstance(shares, pd.DataFrame):
shares = shares[shares.columns[0]]
self._shares = int(shares.iloc[-1])
return self._shares
@property
def last_price(self):
if self._last_price is not None:
return self._last_price
prices = self._get_1y_prices()
if prices.empty:
md = self._get_exchange_metadata()
if "regularMarketPrice" in md:
self._last_price = md["regularMarketPrice"]
else:
self._last_price = float(prices["Close"].iloc[-1])
if _np.isnan(self._last_price):
md = self._get_exchange_metadata()
if "regularMarketPrice" in md:
self._last_price = md["regularMarketPrice"]
return self._last_price
@property
def previous_close(self):
if self._prev_close is not None:
return self._prev_close
prices = self._get_1wk_1h_prepost_prices()
fail = False
if prices.empty:
fail = True
else:
prices = prices[["Close"]].groupby(prices.index.date).last()
if prices.shape[0] < 2:
# Very few symbols have previousClose despite no
# no trading data e.g. 'QCSTIX'.
fail = True
else:
self._prev_close = float(prices["Close"].iloc[-2])
if fail:
# Fallback to original info[] if available.
self._tkr.info # trigger fetch
k = "previousClose"
if self._tkr._quote._retired_info is not None and k in self._tkr._quote._retired_info:
self._prev_close = self._tkr._quote._retired_info[k]
return self._prev_close
@property
def regular_market_previous_close(self):
if self._reg_prev_close is not None:
return self._reg_prev_close
prices = self._get_1y_prices()
if prices.shape[0] == 1:
# Tiny % of tickers don't return daily history before last trading day,
# so backup option is hourly history:
prices = self._get_1wk_1h_reg_prices()
prices = prices[["Close"]].groupby(prices.index.date).last()
if prices.shape[0] < 2:
# Very few symbols have regularMarketPreviousClose despite no
# no trading data. E.g. 'QCSTIX'.
# So fallback to original info[] if available.
self._tkr.info # trigger fetch
k = "regularMarketPreviousClose"
if self._tkr._quote._retired_info is not None and k in self._tkr._quote._retired_info:
self._reg_prev_close = self._tkr._quote._retired_info[k]
else:
self._reg_prev_close = float(prices["Close"].iloc[-2])
return self._reg_prev_close
@property
def open(self):
if self._open is not None:
return self._open
prices = self._get_1y_prices()
if prices.empty:
self._open = None
else:
self._open = float(prices["Open"].iloc[-1])
if _np.isnan(self._open):
self._open = None
return self._open
@property
def day_high(self):
if self._day_high is not None:
return self._day_high
prices = self._get_1y_prices()
if prices.empty:
self._day_high = None
else:
self._day_high = float(prices["High"].iloc[-1])
if _np.isnan(self._day_high):
self._day_high = None
return self._day_high
@property
def day_low(self):
if self._day_low is not None:
return self._day_low
prices = self._get_1y_prices()
if prices.empty:
self._day_low = None
else:
self._day_low = float(prices["Low"].iloc[-1])
if _np.isnan(self._day_low):
self._day_low = None
return self._day_low
@property
def last_volume(self):
if self._last_volume is not None:
return self._last_volume
prices = self._get_1y_prices()
self._last_volume = None if prices.empty else int(prices["Volume"].iloc[-1])
return self._last_volume
@property
def fifty_day_average(self):
if self._50d_day_average is not None:
return self._50d_day_average
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.empty:
self._50d_day_average = None
else:
n = prices.shape[0]
a = n-50
b = n
if a < 0:
a = 0
self._50d_day_average = float(prices["Close"].iloc[a:b].mean())
return self._50d_day_average
@property
def two_hundred_day_average(self):
if self._200d_day_average is not None:
return self._200d_day_average
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.empty:
self._200d_day_average = None
else:
n = prices.shape[0]
a = n-200
b = n
if a < 0:
a = 0
self._200d_day_average = float(prices["Close"].iloc[a:b].mean())
return self._200d_day_average
@property
def ten_day_average_volume(self):
if self._10d_avg_vol is not None:
return self._10d_avg_vol
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.empty:
self._10d_avg_vol = None
else:
n = prices.shape[0]
a = n-10
b = n
if a < 0:
a = 0
self._10d_avg_vol = int(prices["Volume"].iloc[a:b].mean())
return self._10d_avg_vol
@property
def three_month_average_volume(self):
if self._3mo_avg_vol is not None:
return self._3mo_avg_vol
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.empty:
self._3mo_avg_vol = None
else:
dt1 = prices.index[-1]
dt0 = dt1 - utils._interval_to_timedelta("3mo") + utils._interval_to_timedelta("1d")
self._3mo_avg_vol = int(prices.loc[dt0:dt1, "Volume"].mean())
return self._3mo_avg_vol
@property
def year_high(self):
if self._year_high is not None:
return self._year_high
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.empty:
prices = self._get_1y_prices(fullDaysOnly=False)
self._year_high = float(prices["High"].max())
return self._year_high
@property
def year_low(self):
if self._year_low is not None:
return self._year_low
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.empty:
prices = self._get_1y_prices(fullDaysOnly=False)
self._year_low = float(prices["Low"].min())
return self._year_low
@property
def year_change(self):
if self._year_change is not None:
return self._year_change
prices = self._get_1y_prices(fullDaysOnly=True)
if prices.shape[0] >= 2:
self._year_change = (prices["Close"].iloc[-1] - prices["Close"].iloc[0]) / prices["Close"].iloc[0]
self._year_change = float(self._year_change)
return self._year_change
@property
def market_cap(self):
if self._mcap is not None:
return self._mcap
try:
shares = self.shares
except Exception as e:
if "Cannot retrieve share count" in str(e):
shares = None
else:
raise
if shares is None:
# Very few symbols have marketCap despite no share count.
# E.g. 'BTC-USD'
# So fallback to original info[] if available.
self._tkr.info
k = "marketCap"
if self._tkr._quote._retired_info is not None and k in self._tkr._quote._retired_info:
self._mcap = self._tkr._quote._retired_info[k]
else:
self._mcap = float(shares * self.last_price)
return self._mcap
class Quote:
def __init__(self, data: YfData, symbol: str):
self._data = data
self._symbol = symbol
self._info = None
self._retired_info = None
self._sustainability = None
self._recommendations = None
self._upgrades_downgrades = None
self._calendar = None
self._sec_filings = None
self._already_scraped = False
self._already_fetched = False
self._already_fetched_complementary = False
@property
def info(self) -> dict:
if self._info is None:
self._fetch_info()
self._fetch_complementary()
return self._info
@property
def sustainability(self) -> pd.DataFrame:
if self._sustainability is None:
result = self._fetch(modules=['esgScores'])
if result is None:
self._sustainability = pd.DataFrame()
else:
try:
data = result["quoteSummary"]["result"][0]
except (KeyError, IndexError):
if not YfConfig.debug.hide_exceptions:
raise
raise YFDataException(f"Failed to parse json response from Yahoo Finance: {result}")
self._sustainability = pd.DataFrame(data)
return self._sustainability
@property
def recommendations(self) -> pd.DataFrame:
if self._recommendations is None:
result = self._fetch(modules=['recommendationTrend'])
if result is None:
self._recommendations = pd.DataFrame()
else:
try:
data = result["quoteSummary"]["result"][0]["recommendationTrend"]["trend"]
except (KeyError, IndexError):
if not YfConfig.debug.hide_exceptions:
raise
raise YFDataException(f"Failed to parse json response from Yahoo Finance: {result}")
self._recommendations = pd.DataFrame(data)
return self._recommendations
@property
def upgrades_downgrades(self) -> pd.DataFrame:
if self._upgrades_downgrades is None:
result = self._fetch(modules=['upgradeDowngradeHistory'])
if result is None:
self._upgrades_downgrades = pd.DataFrame()
else:
try:
data = result["quoteSummary"]["result"][0]["upgradeDowngradeHistory"]["history"]
if len(data) == 0:
raise YFDataException(f"No upgrade/downgrade history found for {self._symbol}")
df = pd.DataFrame(data)
df.rename(columns={"epochGradeDate": "GradeDate", 'firm': 'Firm', 'toGrade': 'ToGrade', 'fromGrade': 'FromGrade', 'action': 'Action'}, inplace=True)
df.set_index('GradeDate', inplace=True)
df.index = pd.to_datetime(df.index, unit='s')
self._upgrades_downgrades = df
except (KeyError, IndexError):
if not YfConfig.debug.hide_exceptions:
raise
raise YFDataException(f"Failed to parse json response from Yahoo Finance: {result}")
return self._upgrades_downgrades
@property
def calendar(self) -> dict:
if self._calendar is None:
self._fetch_calendar()
return self._calendar
@property
def sec_filings(self) -> dict:
if self._sec_filings is None:
f = self._fetch_sec_filings()
self._sec_filings = {} if f is None else f
return self._sec_filings
@staticmethod
def valid_modules():
return quote_summary_valid_modules
def _fetch(self, modules: list):
if not isinstance(modules, list):
raise YFException("Should provide a list of modules, see available modules using `valid_modules`")
modules = ','.join([m for m in modules if m in quote_summary_valid_modules])
if len(modules) == 0:
raise YFException("No valid modules provided, see available modules using `valid_modules`")
params_dict = {"modules": modules, "corsDomain": "finance.yahoo.com", "formatted": "false", "symbol": self._symbol}
try:
result = self._data.get_raw_json(_QUOTE_SUMMARY_URL_ + f"/{self._symbol}", params=params_dict)
except curl_cffi.requests.exceptions.HTTPError as e:
if not YfConfig.debug.hide_exceptions:
raise
utils.get_yf_logger().error(str(e) + e.response.text)
return None
return result
def _fetch_additional_info(self):
params_dict = {"symbols": self._symbol, "formatted": "false"}
try:
result = self._data.get_raw_json(f"{_QUERY1_URL_}/v7/finance/quote?", params=params_dict)
except curl_cffi.requests.exceptions.HTTPError as e:
if not YfConfig.debug.hide_exceptions:
raise
utils.get_yf_logger().error(str(e) + e.response.text)
return None
return result
def _fetch_info(self):
if self._already_fetched:
return
self._already_fetched = True
modules = ['financialData', 'quoteType', 'defaultKeyStatistics', 'assetProfile', 'summaryDetail']
result = self._fetch(modules=modules)
additional_info = self._fetch_additional_info()
if additional_info is not None and result is not None:
result.update(additional_info)
else:
result = additional_info
query1_info = {}
for quote in ["quoteSummary", "quoteResponse"]:
if quote in result and len(result[quote]["result"]) > 0:
result[quote]["result"][0]["symbol"] = self._symbol
query_info = next(
(info for info in result.get(quote, {}).get("result", [])
if info["symbol"] == self._symbol),
None,
)
if query_info:
query1_info.update(query_info)
# Normalize and flatten nested dictionaries while converting maxAge from days (1) to seconds (86400).
# This handles Yahoo Finance API inconsistency where maxAge is sometimes expressed in days instead of seconds.
processed_info = {}
for k, v in query1_info.items():
# Handle nested dictionary
if isinstance(v, dict):
for k1, v1 in v.items():
if v1 is not None:
processed_info[k1] = 86400 if k1 == "maxAge" and v1 == 1 else v1
elif v is not None:
processed_info[k] = v
query1_info = processed_info
# recursively format but only because of 'companyOfficers'
def _format(k, v):
if isinstance(v, dict) and "raw" in v and "fmt" in v:
v2 = v["fmt"] if k in {"regularMarketTime", "postMarketTime"} else v["raw"]
elif isinstance(v, list):
v2 = [_format(None, x) for x in v]
elif isinstance(v, dict):
v2 = {k: _format(k, x) for k, x in v.items()}
elif isinstance(v, str):
v2 = v.replace("\xa0", " ")
else:
v2 = v
return v2
self._info = {k: _format(k, v) for k, v in query1_info.items()}
def _fetch_complementary(self):
if self._already_fetched_complementary:
return
self._already_fetched_complementary = True
self._fetch_info()
if self._info is None:
return
# Complementary key-statistics. For now just want 'trailing PEG ratio'
keys = {"trailingPegRatio"}
if keys:
# Simplified the original scrape code for key-statistics. Very expensive for fetching
# just one value, best if scraping most/all:
#
# p = _re.compile(r'root\.App\.main = (.*);')
# url = 'https://finance.yahoo.com/quote/{}/key-statistics?p={}'.format(self._ticker.ticker, self._ticker.ticker)
# try:
# r = session.get(url)
# data = _json.loads(p.findall(r.text)[0])
# key_stats = data['context']['dispatcher']['stores']['QuoteTimeSeriesStore']["timeSeries"]
# for k in keys:
# if k not in key_stats or len(key_stats[k])==0:
# # Yahoo website prints N/A, indicates Yahoo lacks necessary data to calculate
# v = None
# else:
# # Select most recent (last) raw value in list:
# v = key_stats[k][-1]["reportedValue"]["raw"]
# self._info[k] = v
# except Exception:
# raise
# pass
#
# For just one/few variable is faster to query directly:
url = f"https://query1.finance.yahoo.com/ws/fundamentals-timeseries/v1/finance/timeseries/{self._symbol}?symbol={self._symbol}"
for k in keys:
url += "&type=" + k
# Request 6 months of data
start = pd.Timestamp.now('UTC').floor("D") - datetime.timedelta(days=365 // 2)
start = int(start.timestamp())
end = pd.Timestamp.now('UTC').ceil("D")
end = int(end.timestamp())
url += f"&period1={start}&period2={end}"
json_str = self._data.cache_get(url=url).text
json_data = json.loads(json_str)
json_result = json_data.get("timeseries") or json_data.get("finance")
if json_result["error"] is not None:
raise YFException("Failed to parse json response from Yahoo Finance: " + str(json_result["error"]))
for k in keys:
keydict = json_result["result"][0]
if k in keydict:
self._info[k] = keydict[k][-1]["reportedValue"]["raw"]
else:
self.info[k] = None
def _fetch_calendar(self):
# secFilings return too old data, so not requesting it for now
result = self._fetch(modules=['calendarEvents'])
if result is None:
self._calendar = {}
return
try:
self._calendar = dict()
_events = result["quoteSummary"]["result"][0]["calendarEvents"]
if 'dividendDate' in _events:
self._calendar['Dividend Date'] = datetime.datetime.fromtimestamp(_events['dividendDate']).date()
if 'exDividendDate' in _events:
self._calendar['Ex-Dividend Date'] = datetime.datetime.fromtimestamp(_events['exDividendDate']).date()
# splits = _events.get('splitDate') # need to check later, i will add code for this if found data
earnings = _events.get('earnings')
if earnings is not None:
self._calendar['Earnings Date'] = [datetime.datetime.fromtimestamp(d).date() for d in earnings.get('earningsDate', [])]
self._calendar['Earnings High'] = earnings.get('earningsHigh', None)
self._calendar['Earnings Low'] = earnings.get('earningsLow', None)
self._calendar['Earnings Average'] = earnings.get('earningsAverage', None)
self._calendar['Revenue High'] = earnings.get('revenueHigh', None)
self._calendar['Revenue Low'] = earnings.get('revenueLow', None)
self._calendar['Revenue Average'] = earnings.get('revenueAverage', None)
except (KeyError, IndexError):
if not YfConfig.debug.hide_exceptions:
raise
raise YFDataException(f"Failed to parse json response from Yahoo Finance: {result}")
def _fetch_sec_filings(self):
result = self._fetch(modules=['secFilings'])
if result is None:
return None
filings = result["quoteSummary"]["result"][0]["secFilings"]["filings"]
# Improve structure
for f in filings:
if 'exhibits' in f:
f['exhibits'] = {e['type']:e['url'] for e in f['exhibits']}
f['date'] = datetime.datetime.strptime(f['date'], '%Y-%m-%d').date()
# Experimental: convert to pandas
# for i in range(len(filings)):
# f = filings[i]
# if 'exhibits' in f:
# for e in f['exhibits']:
# f[e['type']] = e['url']
# del f['exhibits']
# filings[i] = f
# filings = pd.DataFrame(filings)
# for c in filings.columns:
# if c.startswith('EX-'):
# filings[c] = filings[c].astype(str)
# filings.loc[filings[c]=='nan', c] = ''
# filings = filings.drop('epochDate', axis=1)
# filings = filings.set_index('date')
return filings
@@ -0,0 +1,4 @@
from .query import EquityQuery
from .screener import screen, PREDEFINED_SCREENER_QUERIES
__all__ = ['EquityQuery', 'FundQuery', 'screen', 'PREDEFINED_SCREENER_QUERIES']
@@ -0,0 +1,218 @@
from abc import ABC, abstractmethod
import numbers
from typing import List, Union, Dict, TypeVar, Tuple
from yfinance.const import EQUITY_SCREENER_EQ_MAP, EQUITY_SCREENER_FIELDS
from yfinance.const import FUND_SCREENER_EQ_MAP, FUND_SCREENER_FIELDS
from yfinance.exceptions import YFNotImplementedError
from ..utils import dynamic_docstring, generate_list_table_from_dict_universal
T = TypeVar('T', bound=Union[str, numbers.Real])
class QueryBase(ABC):
def __init__(self, operator: str, operand: Union[ List['QueryBase'], Tuple[str, Tuple[Union[str, numbers.Real], ...]] ]):
operator = operator.upper()
if not isinstance(operand, list):
raise TypeError('Invalid operand type')
if len(operand) <= 0:
raise ValueError('Invalid field for EquityQuery')
if operator == 'IS-IN':
self._validate_isin_operand(operand)
elif operator in {'OR','AND'}:
self._validate_or_and_operand(operand)
elif operator == 'EQ':
self._validate_eq_operand(operand)
elif operator == 'BTWN':
self._validate_btwn_operand(operand)
elif operator in {'GT','LT','GTE','LTE'}:
self._validate_gt_lt(operand)
else:
raise ValueError('Invalid Operator Value')
self.operator = operator
self.operands = operand
@property
@abstractmethod
def valid_fields(self) -> List:
raise YFNotImplementedError('valid_fields() needs to be implemented by child')
@property
@abstractmethod
def valid_values(self) -> Dict:
raise YFNotImplementedError('valid_values() needs to be implemented by child')
def _validate_or_and_operand(self, operand: List['QueryBase']) -> None:
if len(operand) <= 1:
raise ValueError('Operand must be length longer than 1')
if all(isinstance(e, QueryBase) for e in operand) is False:
raise TypeError(f'Operand must be type {type(self)} for OR/AND')
def _validate_eq_operand(self, operand: List[Union[str, numbers.Real]]) -> None:
if len(operand) != 2:
raise ValueError('Operand must be length 2 for EQ')
if not any(operand[0] in fields_by_type for fields_by_type in self.valid_fields.values()):
raise ValueError(f'Invalid field for {type(self)} "{operand[0]}"')
if operand[0] in self.valid_values:
vv = self.valid_values[operand[0]]
if isinstance(vv, dict):
# this data structure is slightly different to generate better docs,
# need to unpack here.
vv = set().union(*[e for e in vv.values()])
if operand[1] not in vv:
raise ValueError(f'Invalid EQ value "{operand[1]}"')
def _validate_btwn_operand(self, operand: List[Union[str, numbers.Real]]) -> None:
if len(operand) != 3:
raise ValueError('Operand must be length 3 for BTWN')
if not any(operand[0] in fields_by_type for fields_by_type in self.valid_fields.values()):
raise ValueError(f'Invalid field for {type(self)}')
if isinstance(operand[1], numbers.Real) is False:
raise TypeError('Invalid comparison type for BTWN')
if isinstance(operand[2], numbers.Real) is False:
raise TypeError('Invalid comparison type for BTWN')
def _validate_gt_lt(self, operand: List[Union[str, numbers.Real]]) -> None:
if len(operand) != 2:
raise ValueError('Operand must be length 2 for GT/LT')
if not any(operand[0] in fields_by_type for fields_by_type in self.valid_fields.values()):
raise ValueError(f'Invalid field for {type(self)} "{operand[0]}"')
if isinstance(operand[1], numbers.Real) is False:
raise TypeError('Invalid comparison type for GT/LT')
def _validate_isin_operand(self, operand: List['QueryBase']) -> None:
if len(operand) < 2:
raise ValueError('Operand must be length 2+ for IS-IN')
if not any(operand[0] in fields_by_type for fields_by_type in self.valid_fields.values()):
raise ValueError(f'Invalid field for {type(self)} "{operand[0]}"')
if operand[0] in self.valid_values:
vv = self.valid_values[operand[0]]
if isinstance(vv, dict):
# this data structure is slightly different to generate better docs,
# need to unpack here.
vv = set().union(*[e for e in vv.values()])
for i in range(1, len(operand)):
if operand[i] not in vv:
raise ValueError(f'Invalid EQ value "{operand[i]}"')
def to_dict(self) -> Dict:
op = self.operator
ops = self.operands
if self.operator == 'IS-IN':
# Expand to OR of EQ queries
op = 'OR'
ops = [type(self)('EQ', [self.operands[0], v]) for v in self.operands[1:]]
return {
"operator": op,
"operands": [o.to_dict() if isinstance(o, QueryBase) else o for o in ops]
}
def __repr__(self, indent=0) -> str:
indent_str = " " * indent
class_name = self.__class__.__name__
if isinstance(self.operands, list):
# For list operands, check if they contain any QueryBase objects
if any(isinstance(op, QueryBase) for op in self.operands):
# If there are nested queries, format them with newlines
operands_str = ",\n".join(
f"{indent_str} {op.__repr__(indent + 1) if isinstance(op, QueryBase) else repr(op)}"
for op in self.operands
)
return f"{class_name}({self.operator}, [\n{operands_str}\n{indent_str}])"
else:
# For lists of simple types, keep them on one line
return f"{class_name}({self.operator}, {repr(self.operands)})"
else:
# Handle single operand
return f"{class_name}({self.operator}, {repr(self.operands)})"
def __str__(self) -> str:
return self.__repr__()
class EquityQuery(QueryBase):
"""
The `EquityQuery` class constructs filters for stocks based on specific criteria such as region, sector, exchange, and peer group.
Start with value operations: `EQ` (equals), `IS-IN` (is in), `BTWN` (between), `GT` (greater than), `LT` (less than), `GTE` (greater or equal), `LTE` (less or equal).
Combine them with logical operations: `AND`, `OR`.
Example:
Predefined Yahoo query `aggressive_small_caps`:
.. code-block:: python
from yfinance import EquityQuery
EquityQuery('and', [
EquityQuery('is-in', ['exchange', 'NMS', 'NYQ']),
EquityQuery('lt', ["epsgrowth.lasttwelvemonths", 15])
])
"""
@dynamic_docstring({"valid_operand_fields_table": generate_list_table_from_dict_universal(EQUITY_SCREENER_FIELDS)})
@property
def valid_fields(self) -> Dict:
"""
Valid operands, grouped by category.
{valid_operand_fields_table}
"""
return EQUITY_SCREENER_FIELDS
@dynamic_docstring({"valid_values_table": generate_list_table_from_dict_universal(EQUITY_SCREENER_EQ_MAP, concat_keys=['exchange', 'industry'])})
@property
def valid_values(self) -> Dict:
"""
Most operands take number values, but some have a restricted set of valid values.
{valid_values_table}
"""
return EQUITY_SCREENER_EQ_MAP
class FundQuery(QueryBase):
"""
The `FundQuery` class constructs filters for mutual funds based on specific criteria such as region, sector, exchange, and peer group.
Start with value operations: `EQ` (equals), `IS-IN` (is in), `BTWN` (between), `GT` (greater than), `LT` (less than), `GTE` (greater or equal), `LTE` (less or equal).
Combine them with logical operations: `AND`, `OR`.
Example:
Predefined Yahoo query `solid_large_growth_funds`:
.. code-block:: python
from yfinance import FundQuery
FundQuery('and', [
FundQuery('eq', ['categoryname', 'Large Growth']),
FundQuery('is-in', ['performanceratingoverall', 4, 5]),
FundQuery('lt', ['initialinvestment', 100001]),
FundQuery('lt', ['annualreturnnavy1categoryrank', 50]),
FundQuery('eq', ['exchange', 'NAS'])
])
"""
@dynamic_docstring({"valid_operand_fields_table": generate_list_table_from_dict_universal(FUND_SCREENER_FIELDS)})
@property
def valid_fields(self) -> Dict:
"""
Valid operands, grouped by category.
{valid_operand_fields_table}
"""
return FUND_SCREENER_FIELDS
@dynamic_docstring({"valid_values_table": generate_list_table_from_dict_universal(FUND_SCREENER_EQ_MAP)})
@property
def valid_values(self) -> Dict:
"""
Most operands take number values, but some have a restricted set of valid values.
{valid_values_table}
"""
return FUND_SCREENER_EQ_MAP
@@ -0,0 +1,205 @@
import curl_cffi
from typing import Union
import warnings
from json import dumps
from yfinance.const import _QUERY1_URL_
from yfinance.data import YfData
from ..utils import dynamic_docstring, generate_list_table_from_dict_universal
from .query import EquityQuery as EqyQy
from .query import FundQuery as FndQy
from .query import QueryBase, EquityQuery, FundQuery
_SCREENER_URL_ = f"{_QUERY1_URL_}/v1/finance/screener"
_PREDEFINED_URL_ = f"{_SCREENER_URL_}/predefined/saved"
PREDEFINED_SCREENER_BODY_DEFAULTS = {
"offset":0, "count":25, "userId":"","userIdType":"guid"
}
PREDEFINED_SCREENER_QUERIES = {
'aggressive_small_caps': {"sortField":"eodvolume", "sortType":"desc",
"query": EqyQy('and', [EqyQy('is-in', ['exchange', 'NMS', 'NYQ']), EqyQy('lt', ["epsgrowth.lasttwelvemonths", 15])])},
'day_gainers': {"sortField":"percentchange", "sortType":"DESC",
"query": EqyQy('and', [EqyQy('gt', ['percentchange', 3]), EqyQy('eq', ['region', 'us']), EqyQy('gte', ['intradaymarketcap', 2000000000]), EqyQy('gte', ['intradayprice', 5]), EqyQy('gt', ['dayvolume', 15000])])},
'day_losers': {"sortField":"percentchange", "sortType":"ASC",
"query": EqyQy('and', [EqyQy('lt', ['percentchange', -2.5]), EqyQy('eq', ['region', 'us']), EqyQy('gte', ['intradaymarketcap', 2000000000]), EqyQy('gte', ['intradayprice', 5]), EqyQy('gt', ['dayvolume', 20000])])},
'growth_technology_stocks': {"sortField":"eodvolume", "sortType":"desc",
"query": EqyQy('and', [EqyQy('gte', ['quarterlyrevenuegrowth.quarterly', 25]), EqyQy('gte', ['epsgrowth.lasttwelvemonths', 25]), EqyQy('eq', ['sector', 'Technology']), EqyQy('is-in', ['exchange', 'NMS', 'NYQ'])])},
'most_actives': {"sortField":"dayvolume", "sortType":"DESC",
"query": EqyQy('and', [EqyQy('eq', ['region', 'us']), EqyQy('gte', ['intradaymarketcap', 2000000000]), EqyQy('gt', ['dayvolume', 5000000])])},
'most_shorted_stocks': {"count":25, "offset":0, "sortField":"short_percentage_of_shares_outstanding.value", "sortType":"DESC",
"query": EqyQy('and', [EqyQy('eq', ['region', 'us']), EqyQy('gt', ['intradayprice', 1]), EqyQy('gt', ['avgdailyvol3m', 200000])])},
'small_cap_gainers': {"sortField":"eodvolume", "sortType":"desc",
"query": EqyQy("and", [EqyQy("lt", ["intradaymarketcap",2000000000]), EqyQy("is-in", ["exchange", "NMS", "NYQ"])])},
'undervalued_growth_stocks': {"sortType":"DESC", "sortField":"eodvolume",
"query": EqyQy('and', [EqyQy('btwn', ['peratio.lasttwelvemonths', 0, 20]), EqyQy('lt', ['pegratio_5y', 1]), EqyQy('gte', ['epsgrowth.lasttwelvemonths', 25]), EqyQy('is-in', ['exchange', 'NMS', 'NYQ'])])},
'undervalued_large_caps': {"sortField":"eodvolume", "sortType":"desc",
"query": EqyQy('and', [EqyQy('btwn', ['peratio.lasttwelvemonths', 0, 20]), EqyQy('lt', ['pegratio_5y', 1]), EqyQy('btwn', ['intradaymarketcap', 10000000000, 100000000000]), EqyQy('is-in', ['exchange', 'NMS', 'NYQ'])])},
'conservative_foreign_funds': {"sortType":"DESC", "sortField":"fundnetassets",
"query": FndQy('and', [FndQy('is-in', ['categoryname', 'Foreign Large Value', 'Foreign Large Blend', 'Foreign Large Growth', 'Foreign Small/Mid Growth', 'Foreign Small/Mid Blend', 'Foreign Small/Mid Value']), FndQy('is-in', ['performanceratingoverall', 4, 5]), FndQy('lt', ['initialinvestment', 100001]), FndQy('lt', ['annualreturnnavy1categoryrank', 50]), FndQy('is-in', ['riskratingoverall', 1, 2, 3]), FndQy('eq', ['exchange', 'NAS'])])},
'high_yield_bond': {"sortType":"DESC", "sortField":"fundnetassets",
"query": FndQy('and', [FndQy('is-in', ['performanceratingoverall', 4, 5]), FndQy('lt', ['initialinvestment', 100001]), FndQy('lt', ['annualreturnnavy1categoryrank', 50]), FndQy('is-in', ['riskratingoverall', 1, 2, 3]), FndQy('eq', ['categoryname', 'High Yield Bond']), FndQy('eq', ['exchange', 'NAS'])])},
'portfolio_anchors': {"sortType":"DESC", "sortField":"fundnetassets",
"query": FndQy('and', [FndQy('eq', ['categoryname', 'Large Blend']), FndQy('is-in', ['performanceratingoverall', 4, 5]), FndQy('lt', ['initialinvestment', 100001]), FndQy('lt', ['annualreturnnavy1categoryrank', 50]), FndQy('eq', ['exchange', 'NAS'])])},
'solid_large_growth_funds': {"sortType":"DESC", "sortField":"fundnetassets",
"query": FndQy('and', [FndQy('eq', ['categoryname', 'Large Growth']), FndQy('is-in', ['performanceratingoverall', 4, 5]), FndQy('lt', ['initialinvestment', 100001]), FndQy('lt', ['annualreturnnavy1categoryrank', 50]), FndQy('eq', ['exchange', 'NAS'])])},
'solid_midcap_growth_funds': {"sortType":"DESC", "sortField":"fundnetassets",
"query": FndQy('and', [FndQy('eq', ['categoryname', 'Mid-Cap Growth']), FndQy('is-in', ['performanceratingoverall', 4, 5]), FndQy('lt', ['initialinvestment', 100001]), FndQy('lt', ['annualreturnnavy1categoryrank', 50]), FndQy('eq', ['exchange', 'NAS'])])},
'top_mutual_funds': {"sortType":"DESC", "sortField":"percentchange",
"query": FndQy('and', [FndQy('gt', ['intradayprice', 15]), FndQy('is-in', ['performanceratingoverall', 4, 5]), FndQy('gt', ['initialinvestment', 1000]), FndQy('eq', ['exchange', 'NAS'])])}
}
@dynamic_docstring({"predefined_screeners": generate_list_table_from_dict_universal(PREDEFINED_SCREENER_QUERIES, bullets=True, title='Predefined queries (Dec-2024)')})
def screen(query: Union[str, EquityQuery, FundQuery],
offset: int = None,
size: int = None,
count: int = None,
sortField: str = None,
sortAsc: bool = None,
userId: str = None,
userIdType: str = None,
session = None):
"""
Run a screen: predefined query, or custom query.
:Parameters:
* Defaults only apply if query = EquityQuery or FundQuery
query : str | Query:
The query to execute, either name of predefined or custom query.
For predefined list run yf.PREDEFINED_SCREENER_QUERIES.keys()
offset : int
The offset for the results. Default 0.
size : int
number of results to return. Default 100, maximum 250 (Yahoo)
Use count instead for predefined queries.
count : int
number of results to return. Default 25, maximum 250 (Yahoo)
Use size instead for custom queries.
sortField : str
field to sort by. Default "ticker"
sortAsc : bool
Sort ascending? Default False
userId : str
The user ID. Default empty.
userIdType : str
Type of user ID (e.g., "guid"). Default "guid".
Example: predefined query
.. code-block:: python
import yfinance as yf
response = yf.screen("aggressive_small_caps")
Example: custom query
.. code-block:: python
import yfinance as yf
from yfinance import EquityQuery
q = EquityQuery('and', [
EquityQuery('gt', ['percentchange', 3]),
EquityQuery('eq', ['region', 'us'])
])
response = yf.screen(q, sortField = 'percentchange', sortAsc = True)
To access predefineds query code
.. code-block:: python
import yfinance as yf
query = yf.PREDEFINED_SCREENER_QUERIES['aggressive_small_caps']
{predefined_screeners}
"""
_data = YfData(session=session)
# Only use defaults when user NOT give a predefined, because
# Yahoo's predefined endpoint auto-applies defaults. Also,
# that endpoint might be ignoring these fields.
defaults = {
'offset': 0,
'count': 25,
'sortField': 'ticker',
'sortAsc': False,
'userId': "",
'userIdType': "guid"
}
if count is not None and count > 250:
raise ValueError("Yahoo limits query count to 250, reduce count.")
if size is not None and size > 250:
raise ValueError("Yahoo limits query size to 250, reduce size.")
if offset is not None and isinstance(query, str):
# offset ignored by predefined API so switch to other API
post_query = PREDEFINED_SCREENER_QUERIES[query]
query = post_query['query']
# use predefined's attributes if user not specified
if sortField is None:
sortField = post_query['sortField']
if sortAsc is None:
sortAsc = post_query['sortType'].lower() == 'asc'
# and don't use defaults
defaults = {}
fields = {'offset': offset, 'count': count, "size": size, 'sortField': sortField, 'sortAsc': sortAsc, 'userId': userId, 'userIdType': userIdType}
params_dict = {"corsDomain": "finance.yahoo.com", "formatted": "false", "lang": "en-US", "region": "US"}
post_query = None
if isinstance(query, str):
# post_query = PREDEFINED_SCREENER_QUERIES[query]
# Switch to Yahoo's predefined endpoint
if size is not None:
warnings.warn("Screen 'size' argument is deprecated for predefined screens, set 'count' instead.", DeprecationWarning, stacklevel=2)
count = size
size = None
fields['count'] = fields['size']
del fields['size']
params_dict['scrIds'] = query
for k,v in fields.items():
if v is not None:
params_dict[k] = v
resp = _data.get(url=_PREDEFINED_URL_, params=params_dict)
try:
resp.raise_for_status()
except curl_cffi.requests.exceptions.HTTPError:
if query not in PREDEFINED_SCREENER_QUERIES:
print(f"yfinance.screen: '{query}' is probably not a predefined query.")
raise
return resp.json()["finance"]["result"][0]
elif isinstance(query, QueryBase):
# Prepare other fields
for k in defaults:
if k not in fields or fields[k] is None:
fields[k] = defaults[k]
fields['sortType'] = 'ASC' if fields['sortAsc'] else 'DESC'
del fields['sortAsc']
post_query = fields
post_query['query'] = query
else:
raise ValueError(f'Query must be type str or QueryBase, not "{type(query)}"')
if query is None:
raise ValueError('No query provided')
if isinstance(post_query['query'], EqyQy):
post_query['quoteType'] = 'EQUITY'
elif isinstance(post_query['query'], FndQy):
post_query['quoteType'] = 'MUTUALFUND'
post_query['query'] = post_query['query'].to_dict()
data = dumps(post_query, separators=(",", ":"), ensure_ascii=False)
# Fetch
response = _data.post(_SCREENER_URL_,
data=data,
params=params_dict)
response.raise_for_status()
return response.json()['finance']['result'][0]
@@ -0,0 +1,159 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# yfinance - market data downloader
# https://github.com/ranaroussi/yfinance
#
# Copyright 2017-2019 Ran Aroussi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import json as _json
from . import utils
from .config import YfConfig
from .const import _BASE_URL_
from .data import YfData
from .exceptions import YFDataException
class Search:
def __init__(self, query, max_results=8, news_count=8, lists_count=8, include_cb=True, include_nav_links=False,
include_research=False, include_cultural_assets=False, enable_fuzzy_query=False, recommended=8,
session=None, timeout=30, raise_errors=True):
"""
Fetches and organizes search results from Yahoo Finance, including stock quotes and news articles.
Args:
query: The search query (ticker symbol or company name).
max_results: Maximum number of stock quotes to return (default 8).
news_count: Number of news articles to include (default 8).
lists_count: Number of lists to include (default 8).
include_cb: Include the company breakdown (default True).
include_nav_links: Include the navigation links (default False).
include_research: Include the research reports (default False).
include_cultural_assets: Include the cultural assets (default False).
enable_fuzzy_query: Enable fuzzy search for typos (default False).
recommended: Recommended number of results to return (default 8).
session: Custom HTTP session for requests (default None).
timeout: Request timeout in seconds (default 30).
raise_errors: Raise exceptions on error (default True).
"""
self.session = session
self._data = YfData(session=self.session)
self.query = query
self.max_results = max_results
self.enable_fuzzy_query = enable_fuzzy_query
self.news_count = news_count
self.timeout = timeout
self.raise_errors = raise_errors
self.lists_count = lists_count
self.include_cb = include_cb
self.nav_links = include_nav_links
self.enable_research = include_research
self.enable_cultural_assets = include_cultural_assets
self.recommended = recommended
self._logger = utils.get_yf_logger()
self._response = {}
self._all = {}
self._quotes = []
self._news = []
self._lists = []
self._research = []
self._nav = []
self.search()
def search(self) -> 'Search':
"""Search using the query parameters defined in the constructor."""
url = f"{_BASE_URL_}/v1/finance/search"
params = {
"q": self.query,
"quotesCount": self.max_results,
"enableFuzzyQuery": self.enable_fuzzy_query,
"newsCount": self.news_count,
"quotesQueryId": "tss_match_phrase_query",
"newsQueryId": "news_cie_vespa",
"listsCount": self.lists_count,
"enableCb": self.include_cb,
"enableNavLinks": self.nav_links,
"enableResearchReports": self.enable_research,
"enableCulturalAssets": self.enable_cultural_assets,
"recommendedCount": self.recommended
}
self._logger.debug(f'{self.query}: Yahoo GET parameters: {str(dict(params))}')
data = self._data.cache_get(url=url, params=params, timeout=self.timeout)
if data is None or "Will be right back" in data.text:
raise YFDataException("*** YAHOO! FINANCE IS CURRENTLY DOWN! ***")
try:
data = data.json()
except _json.JSONDecodeError:
if not YfConfig.debug.hide_exceptions:
raise
self._logger.error(f"{self.query}: 'search' fetch received faulty data")
data = {}
self._response = data
# Filter quotes to only include symbols
self._quotes = [quote for quote in data.get("quotes", []) if "symbol" in quote]
self._news = data.get("news", [])
self._lists = data.get("lists", [])
self._research = data.get("researchReports", [])
self._nav = data.get("nav", [])
self._all = {"quotes": self._quotes, "news": self._news, "lists": self._lists, "research": self._research,
"nav": self._nav}
return self
@property
def quotes(self) -> 'list':
"""Get the quotes from the search results."""
return self._quotes
@property
def news(self) -> 'list':
"""Get the news from the search results."""
return self._news
@property
def lists(self) -> 'list':
"""Get the lists from the search results."""
return self._lists
@property
def research(self) -> 'list':
"""Get the research reports from the search results."""
return self._research
@property
def nav(self) -> 'list':
"""Get the navigation links from the search results."""
return self._nav
@property
def all(self) -> 'dict[str,list]':
"""Get all the results from the search results: filtered down version of response."""
return self._all
@property
def response(self) -> 'dict':
"""Get the raw response from the search results."""
return self._response
@@ -0,0 +1,26 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# yfinance - market data downloader
# https://github.com/ranaroussi/yfinance
#
# Copyright 2017-2019 Ran Aroussi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
_DFS = {}
_PROGRESS_BAR = None
_ERRORS = {}
_TRACEBACKS = {}
_ISINS = {}
@@ -0,0 +1,324 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# yfinance - market data downloader
# https://github.com/ranaroussi/yfinance
#
# Copyright 2017-2019 Ran Aroussi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import print_function
from collections import namedtuple as _namedtuple
import pandas as _pd
from .base import TickerBase
from .const import _BASE_URL_
from .scrapers.funds import FundsData
class Ticker(TickerBase):
def __init__(self, ticker, session=None):
super(Ticker, self).__init__(ticker, session=session)
self._expirations = {}
self._underlying = {}
def __repr__(self):
return f'yfinance.Ticker object <{self.ticker}>'
def _download_options(self, date=None):
if date is None:
url = f"{_BASE_URL_}/v7/finance/options/{self.ticker}"
else:
url = f"{_BASE_URL_}/v7/finance/options/{self.ticker}?date={date}"
r = self._data.get(url=url).json()
if len(r.get('optionChain', {}).get('result', [])) > 0:
for exp in r['optionChain']['result'][0]['expirationDates']:
self._expirations[_pd.Timestamp(exp, unit='s').strftime('%Y-%m-%d')] = exp
self._underlying = r['optionChain']['result'][0].get('quote', {})
opt = r['optionChain']['result'][0].get('options', [])
return dict(**opt[0],underlying=self._underlying) if len(opt) > 0 else {}
return {}
def _options2df(self, opt, tz=None):
data = _pd.DataFrame(opt).reindex(columns=[
'contractSymbol',
'lastTradeDate',
'strike',
'lastPrice',
'bid',
'ask',
'change',
'percentChange',
'volume',
'openInterest',
'impliedVolatility',
'inTheMoney',
'contractSize',
'currency'])
data['lastTradeDate'] = _pd.to_datetime(
data['lastTradeDate'], unit='s', utc=True)
if tz is not None:
data['lastTradeDate'] = data['lastTradeDate'].dt.tz_convert(tz)
return data
def option_chain(self, date=None, tz=None):
if date is None:
options = self._download_options()
else:
if not self._expirations:
self._download_options()
if date not in self._expirations:
raise ValueError(
f"Expiration `{date}` cannot be found. "
f"Available expirations are: [{', '.join(self._expirations)}]")
date = self._expirations[date]
options = self._download_options(date)
if not options:
return _namedtuple('Options', ['calls', 'puts', 'underlying'])(**{
"calls": None, "puts": None, "underlying": None
})
return _namedtuple('Options', ['calls', 'puts', 'underlying'])(**{
"calls": self._options2df(options['calls'], tz=tz),
"puts": self._options2df(options['puts'], tz=tz),
"underlying": options['underlying']
})
# ------------------------
@property
def isin(self):
return self.get_isin()
@property
def major_holders(self) -> _pd.DataFrame:
return self.get_major_holders()
@property
def institutional_holders(self) -> _pd.DataFrame:
return self.get_institutional_holders()
@property
def mutualfund_holders(self) -> _pd.DataFrame:
return self.get_mutualfund_holders()
@property
def insider_purchases(self) -> _pd.DataFrame:
return self.get_insider_purchases()
@property
def insider_transactions(self) -> _pd.DataFrame:
return self.get_insider_transactions()
@property
def insider_roster_holders(self) -> _pd.DataFrame:
return self.get_insider_roster_holders()
@property
def dividends(self) -> _pd.Series:
return self.get_dividends()
@property
def capital_gains(self) -> _pd.Series:
return self.get_capital_gains()
@property
def splits(self) -> _pd.Series:
return self.get_splits()
@property
def actions(self) -> _pd.DataFrame:
return self.get_actions()
@property
def shares(self) -> _pd.DataFrame:
return self.get_shares()
@property
def info(self) -> dict:
return self.get_info()
@property
def fast_info(self):
return self.get_fast_info()
@property
def calendar(self) -> dict:
"""
Returns a dictionary of events, earnings, and dividends for the ticker
"""
return self.get_calendar()
@property
def sec_filings(self) -> dict:
return self.get_sec_filings()
@property
def recommendations(self):
return self.get_recommendations()
@property
def recommendations_summary(self):
return self.get_recommendations_summary()
@property
def upgrades_downgrades(self):
return self.get_upgrades_downgrades()
@property
def earnings(self) -> _pd.DataFrame:
return self.get_earnings()
@property
def quarterly_earnings(self) -> _pd.DataFrame:
return self.get_earnings(freq='quarterly')
@property
def income_stmt(self) -> _pd.DataFrame:
return self.get_income_stmt(pretty=True)
@property
def quarterly_income_stmt(self) -> _pd.DataFrame:
return self.get_income_stmt(pretty=True, freq='quarterly')
@property
def ttm_income_stmt(self) -> _pd.DataFrame:
return self.get_income_stmt(pretty=True, freq='trailing')
@property
def incomestmt(self) -> _pd.DataFrame:
return self.income_stmt
@property
def quarterly_incomestmt(self) -> _pd.DataFrame:
return self.quarterly_income_stmt
@property
def ttm_incomestmt(self) -> _pd.DataFrame:
return self.ttm_income_stmt
@property
def financials(self) -> _pd.DataFrame:
return self.income_stmt
@property
def quarterly_financials(self) -> _pd.DataFrame:
return self.quarterly_income_stmt
@property
def ttm_financials(self) -> _pd.DataFrame:
return self.ttm_income_stmt
@property
def balance_sheet(self) -> _pd.DataFrame:
return self.get_balance_sheet(pretty=True)
@property
def quarterly_balance_sheet(self) -> _pd.DataFrame:
return self.get_balance_sheet(pretty=True, freq='quarterly')
@property
def balancesheet(self) -> _pd.DataFrame:
return self.balance_sheet
@property
def quarterly_balancesheet(self) -> _pd.DataFrame:
return self.quarterly_balance_sheet
@property
def cash_flow(self) -> _pd.DataFrame:
return self.get_cash_flow(pretty=True, freq="yearly")
@property
def quarterly_cash_flow(self) -> _pd.DataFrame:
return self.get_cash_flow(pretty=True, freq='quarterly')
@property
def ttm_cash_flow(self) -> _pd.DataFrame:
return self.get_cash_flow(pretty=True, freq='trailing')
@property
def cashflow(self) -> _pd.DataFrame:
return self.cash_flow
@property
def quarterly_cashflow(self) -> _pd.DataFrame:
return self.quarterly_cash_flow
@property
def ttm_cashflow(self) -> _pd.DataFrame:
return self.ttm_cash_flow
@property
def analyst_price_targets(self) -> dict:
return self.get_analyst_price_targets()
@property
def earnings_estimate(self) -> _pd.DataFrame:
return self.get_earnings_estimate()
@property
def revenue_estimate(self) -> _pd.DataFrame:
return self.get_revenue_estimate()
@property
def earnings_history(self) -> _pd.DataFrame:
return self.get_earnings_history()
@property
def eps_trend(self) -> _pd.DataFrame:
return self.get_eps_trend()
@property
def eps_revisions(self) -> _pd.DataFrame:
return self.get_eps_revisions()
@property
def growth_estimates(self) -> _pd.DataFrame:
return self.get_growth_estimates()
@property
def sustainability(self) -> _pd.DataFrame:
return self.get_sustainability()
@property
def options(self) -> tuple:
if not self._expirations:
self._download_options()
return tuple(self._expirations.keys())
@property
def news(self) -> list:
return self.get_news()
@property
def earnings_dates(self) -> _pd.DataFrame:
return self.get_earnings_dates()
@property
def history_metadata(self) -> dict:
return self.get_history_metadata()
@property
def funds_data(self) -> FundsData:
return self.get_funds_data()
@@ -0,0 +1,99 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
#
# yfinance - market data downloader
# https://github.com/ranaroussi/yfinance
#
# Copyright 2017-2019 Ran Aroussi
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
from __future__ import print_function
from . import Ticker, multi
from .live import WebSocket
from .data import YfData
class Tickers:
def __repr__(self):
return f"yfinance.Tickers object <{','.join(self.symbols)}>"
def __init__(self, tickers, session=None):
tickers = tickers if isinstance(
tickers, list) else tickers.replace(',', ' ').split()
self.symbols = [ticker.upper() for ticker in tickers]
self.tickers = {ticker: Ticker(ticker, session=session) for ticker in self.symbols}
self._data = YfData(session=session)
self._message_handler = None
self.ws = None
# self.tickers = _namedtuple(
# "Tickers", ticker_objects.keys(), rename=True
# )(*ticker_objects.values())
def history(self, period=None, interval="1d",
start=None, end=None, prepost=False,
actions=True, auto_adjust=True, repair=False,
threads=True, group_by='column', progress=True,
timeout=10, **kwargs):
return self.download(
period, interval,
start, end, prepost,
actions, auto_adjust, repair,
threads, group_by, progress,
timeout, **kwargs)
def download(self, period=None, interval="1d",
start=None, end=None, prepost=False,
actions=True, auto_adjust=True, repair=False,
threads=True, group_by='column', progress=True,
timeout=10, **kwargs):
data = multi.download(self.symbols,
start=start, end=end,
actions=actions,
auto_adjust=auto_adjust,
repair=repair,
period=period,
interval=interval,
prepost=prepost,
group_by='ticker',
threads=threads,
progress=progress,
timeout=timeout,
**kwargs)
for symbol in self.symbols:
self.tickers.get(symbol, {})._history = data[symbol]
if group_by == 'column':
data.columns = data.columns.swaplevel(0, 1)
data.sort_index(level=0, axis=1, inplace=True)
return data
def news(self):
return {ticker: [item for item in Ticker(ticker).news] for ticker in self.symbols}
def live(self, message_handler=None, verbose=True):
self._message_handler = message_handler
self.ws = WebSocket(verbose=verbose)
self.ws.subscribe(self.symbols)
self.ws.listen(self._message_handler)
File diff suppressed because it is too large Load Diff
@@ -0,0 +1 @@
version = "1.1.0"