Source code for finagg.indices.api

"""Indices API (symbols in popular indices)."""

import io
import os
from abc import ABC, abstractmethod
from datetime import timedelta
from functools import cache
from typing import ClassVar

import pandas as pd
import requests
import requests_cache
from bs4 import BeautifulSoup

from .. import backend

session = requests_cache.CachedSession(
    str(backend.http_cache_path),
    expire_after=timedelta(weeks=1),
)


[docs]class API(ABC): """Abstract indices API.""" #: Request API URL. url: ClassVar[str]
[docs] @classmethod @abstractmethod def get(cls, *, user_agent: None | str = None) -> pd.DataFrame: """Main dataset API method."""
[docs] @classmethod def get_ticker_list(cls, *, user_agent: None | str = None) -> list[str]: """List the tickers in the index.""" df = cls.get(user_agent=user_agent) return df["ticker"].tolist()
[docs]class DJIA(API): """Get data on all companies within the DJIA. The module variable :data:`finagg.indices.api.djia` is an instance of this API implementation and is the most popular interface for querying this API. """ url = "https://en.wikipedia.org/wiki/Dow_Jones_Industrial_Average"
[docs] @classmethod def get(cls, *, user_agent: None | str = None) -> pd.DataFrame: """Get a dataframe containing data on the tickers in the DJIA. Examples: >>> finagg.indices.api.djia.get().head(5) # doctest: +SKIP company exchange ticker industry added weight 0 3M NYSE MMM Conglomerate 1976-08-09 0.0241 1 American Express NYSE AXP Financial services 1982-08-30 0.0302 2 Amgen NASDAQ AMGN Biopharmaceutical 2020-08-31 0.0548 3 Apple NASDAQ AAPL Information technology 2015-03-19 0.0284 4 Boeing NYSE BA Aerospace and defense 1987-03-12 0.0336 """ response = _get(cls.url, user_agent=user_agent) soup = BeautifulSoup(response.text, "html.parser") tbl = soup.find("table", {"class": "wikitable"}) tbl_io = io.StringIO(str(tbl)) (df,) = pd.read_html(tbl_io) df = pd.DataFrame(df) def _percent_to_fraction(item: str) -> float: value, _ = item.split("%") return float(value) / 100 df.drop("Notes", axis=1, inplace=True) df = df.rename( columns={ "Company": "company", "Exchange": "exchange", "Symbol": "ticker", "Industry": "industry", "Date added": "added", "Index weighting": "weight", } ) df["weight"] = df["weight"].apply(_percent_to_fraction) return df
[docs]class Nasdaq100(API): """Get data on all companies within the Nasdaq 100. The module variable :data:`finagg.indices.api.nasdaq100` is an instance of this API implementation and is the most popular interface for querying this API. """ url = "https://en.wikipedia.org/wiki/Nasdaq-100"
[docs] @classmethod def get(cls, *, user_agent: None | str = None) -> pd.DataFrame: """Get a dataframe containing data on the tickers in the Nasdaq 100. Examples: >>> finagg.indices.api.nasdaq100.get().head(5) # doctest: +SKIP company ticker industry sub_industry 0 Activision Blizzard ATVI Communication Services Interactive Home Entertainment 1 Adobe Inc. ADBE Information Technology Application Software 2 ADP ADP Information Technology Data Processing & Outsourced Services 3 Airbnb ABNB Consumer Discretionary Internet & Direct Marketing Retail 4 Align Technology ALGN Health Care Health Care Supplies """ response = _get(cls.url, user_agent=user_agent) soup = BeautifulSoup(response.text, "html.parser") tbl = soup.find_all("table", {"class": "wikitable"})[3] tbl_io = io.StringIO(str(tbl)) (df,) = pd.read_html(tbl_io) df = pd.DataFrame(df) return df.rename( columns={ "Company": "company", "Symbol": "ticker", "GICS Sector": "industry", "GICS Sub-Industry": "sub_industry", } )
[docs]class SP500(API): """Get data on all companies within the S&P 500. The module variable :data:`finagg.indices.api.sp500` is an instance of this API implementation and is the most popular interface for querying this API. """ url = "https://en.wikipedia.org/wiki/List_of_S%26P_500_companies"
[docs] @classmethod def get(cls, *, user_agent: None | str = None) -> pd.DataFrame: """Get a dataframe containing data on the tickers in the S&P 500. Examples: >>> finagg.indices.api.sp500.get().head(5) # doctest: +SKIP ticker company industry ... 0 MMM 3M Industrials ... 1 AOS A. O. Smith Industrials ... 2 ABT Abbott Health Care ... 3 ABBV AbbVie Health Care ... 4 ACN Accenture Information Technology ... """ response = _get(cls.url, user_agent=user_agent) soup = BeautifulSoup(response.text, "html.parser") tbl = soup.find("table", {"class": "wikitable"}) tbl_io = io.StringIO(str(tbl)) (df,) = pd.read_html(tbl_io) df = pd.DataFrame(df) df.drop("SEC filings", axis=1, inplace=True, errors="ignore") return df.rename( columns={ "Symbol": "ticker", "Security": "company", "GICS Sector": "industry", "GICS Sub-Industry": "sub_industry", "Headquarters Location": "headquarters", "Date first added": "added", "CIK": "cik", "Founded": "founded", } )
djia = DJIA() """The most popular way for accessing the :class:`DJIA` API implementation. :meta hide-value: """ nasdaq100 = Nasdaq100() """The most popular way for accessing the :class:`Nasdaq100` API implementation. :meta hide-value: """ sp500 = SP500() """The most popular way for accessing the :class:`SP500` API implementation. :meta hide-value: """ def _get(url: str, /, *, user_agent: None | str = None) -> requests.Response: """Tickers API request helper. Args: url: Complete URL to get from. user_agent: Required user agent header declaration to avoid errors. Returns: Successful responses. Raises: `RuntimeError`: If a user agent isn't provided or found in the environment. """ user_agent = user_agent or os.environ.get( "INDICES_API_USER_AGENT", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) " "AppleWebKit/537.36 (KHTML, like Gecko) " "Chrome/39.0.2171.95 Safari/537.36", ) if not user_agent: raise RuntimeError( "No indices API user agent declaration found. " "Pass your user agent declaration to the API directly, or " "set the `INDICES_API_USER_AGENT` environment variable." ) response = session.get(url, headers={"User-Agent": user_agent}) response.raise_for_status() return response
[docs]@cache def get_ticker_set(*, user_agent: None | str = None) -> set[str]: """Get the set of tickers from all the popular indices. Examples: >>> "AAPL" in finagg.indices.api.get_ticker_set() # doctest: +SKIP True """ tickers = set() tickers.update(djia.get_ticker_list(user_agent=user_agent)) tickers.update(nasdaq100.get_ticker_list(user_agent=user_agent)) tickers.update(sp500.get_ticker_list(user_agent=user_agent)) return tickers