"""A minimal version of https://github.com/PatzEdi/Stockstir/.""" import re from dataclasses import dataclass import httpx @dataclass class Provider: base_url: str regex: str # this a nice idea def ticker_url(self, ticker: str) -> str: return self.base_url + ticker def get_quote(self, ticker: str) -> float: source = get_page(self.ticker_url(ticker)) return gather_price(source, self.regex) def gather_price(source: str, pattern: str) -> float: # this can be refoctored in due course # important that you treat gather_price() as a function that just needs # source text and a pattern to run, you do not pass any other information inside prices = re.findall(pattern, source) counter = 0 for i in range(len(prices)): # why not just enumerate()? try: # do you want the last or first valid counter? # if you want the first - you can just return here float(prices[i]) break except: # E722 Do not use bare `except` counter += 1 return float(prices[counter].replace(",", "")) def get_page(url): # will need to randomize headers headers = { "User-Agent": "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:49.0) Gecko/20100101 Firefox/49.0" } return httpx.get(url, headers=headers).text def get_quote(ticker: str, provider="cnbc"): return providers[provider].get_quote(ticker) providers = { "cnbc": Provider( "https://www.cnbc.com/quotes/", r'(?<="price":")(.*)(?=","priceChange":")' ), "bi": Provider( "https://markets.businessinsider.com/stocks/", r'"currentValue":(.*?),"previousClose":', ), "zacks": Provider( "https://www.zacks.com/stock/quote/", r'last_price">\$(.*?)' ), } # this goes to unit test assert providers["cnbc"].ticker_url("MSFT") == "https://www.cnbc.com/quotes/MSFT" # this is a sample run for the program print(get_quote("MSFT")) # 428.74