Created
July 23, 2018 10:20
-
-
Save ValRCS/1f38261e4e6169a3d90e86c8264753a2 to your computer and use it in GitHub Desktop.
Yahoo Finance web scraper for Dow Jones stock prices
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # -*- coding: utf-8 -*- | |
| from mpl_finance import candlestick_ohlc | |
| import numpy as np | |
| from matplotlib import pyplot | |
| import pandas as pd | |
| from bs4 import BeautifulSoup | |
| import urllib.request | |
| import re | |
| pyplot.style.use("ggplot") | |
| pyplot.figure(figsize=(12.80,7.20)) | |
| def getstk(code) : # Scrapes the data | |
| url = "https://finance.yahoo.com/quote/"+code+"/history" | |
| webpage = urllib.request.urlopen(url) | |
| soup = BeautifulSoup(webpage,"lxml") | |
| d = list(soup.findAll("td")) | |
| tf = [] | |
| def isfloat(v) : | |
| try : | |
| float(v) | |
| return True | |
| except: | |
| return False | |
| def isvol(v) : | |
| try : | |
| int("".join(v.split(","))) == int | |
| return True | |
| except: | |
| return False | |
| for n in range(0, len(d)-1) : | |
| x = re.split("[>+<]",str(d[n])) | |
| x1 = re.split("[>+<]",str(d[n+1])) | |
| if len(x) == 5 : # Length of 0 day volume | |
| if x[4] == "" : # Days of zero volume | |
| tf.append(0) | |
| if len(x) == 9 : # normal day length | |
| if isfloat(x[4]) == True : # Prices | |
| tf.append(float(x[4])) | |
| if isvol(x[4]) == True : # Volume | |
| tf.append(int("".join(x[4].split(",")))) | |
| if isvol(x[4]) == False and isfloat(x[4]) == False: # Dates | |
| if len(x1) < 10 : # Dividend has len of 17 | |
| tf.append(x[4]) | |
| stockFile = pd.DataFrame() | |
| def MakeDf(name,v) : | |
| l = [] | |
| for n in range(v,len(tf),7) : | |
| l.append(tf[n]) | |
| stockFile[name] = l[::-1] | |
| return | |
| MakeDf("Date",0) | |
| MakeDf("Open",1) | |
| MakeDf("High",2) | |
| MakeDf("Low",3) | |
| MakeDf("Close",4) | |
| MakeDf("Adj.Close",5) | |
| MakeDf("Volume",6) | |
| # Adjusting the prices for dividends | |
| stockFile["Dif"] = stockFile["Adj.Close"] - stockFile["Close"] | |
| stockFile["Open.Adj"] = stockFile["Open"] + stockFile["Dif"] | |
| stockFile["High.Adj"] = stockFile["High"] + stockFile["Dif"] | |
| stockFile["Low.Adj"] = stockFile["Low"] + stockFile["Dif"] | |
| stockFile = stockFile.query('Volume !=0') # excluding days without moviment (volume = 0) | |
| stockFile = stockFile.dropna() | |
| stockFile.index = range(len(stockFile)) # Adjusting the index if a drop is done | |
| return stockFile | |
| def grafico(stk,eixo) : # Plots the candlestick chart and the moving average level | |
| stock = getstk(stk) | |
| dia = stock["Date"][len(stock)-1] | |
| print () | |
| print ("Stock : ", stk) | |
| print ("Last date : ", dia) | |
| print ("Last price : ", stock["Close"][len(stock)-1]) | |
| #Moving average calculation | |
| MM1 = list(stock["Close"].rolling(window = 20).mean()) # 20-day period | |
| OHLC = [] | |
| prd = 12 # day's window for each stock | |
| #Axis for drawing the last day's moving average level | |
| eixoX = np.linspace(1, prd+1, num = prd+1) | |
| eixoY = [MM1[-1]] * (prd+1) | |
| print ("Moving average : ", round(MM1[-1],2)) | |
| l = len(stock) | |
| dt = 1 | |
| for n in range (l-prd-1,l,) : # Making a candlestick list | |
| xm = dt,stock['Open.Adj'].iloc[n],stock['High.Adj'].iloc[n],stock['Low.Adj'].iloc[n],stock['Adj.Close'].iloc[n] | |
| OHLC.append(xm) | |
| dt +=1 | |
| bboxP = dict(boxstyle = "round,pad=0.1",fc="white",ec = "black",lw = 0.5, alpha = 0.35) | |
| candlestick_ohlc(eixo, OHLC, width=0.7, colorup = 'green', colordown = 'red') | |
| pyplot.plot(eixoX, eixoY, color = "darkorange", alpha = 0.5) # last day moving average level | |
| pyplot.text(prd+1,OHLC[-1][4],str(round(OHLC[-1][4],2)), ha = "center", va = "center", size = 5.4, bbox = bboxP) # last price label | |
| g = pyplot.gca() | |
| pyplot.title(stk,fontsize = 7.6) | |
| g.set_xticklabels([]) | |
| g.set_yticklabels([]) | |
| return (dia) | |
| # Stock list for Dow Jones Ind. Average | |
| lista = ['mmm','axp','aapl','ba','cat','cvx','csco','ko','dis','dwdp', | |
| 'xom','gs','hd','ibm','intc','jnj','jpm','mcd','mrk','msft', | |
| 'nke','pfe','pg','trv','utx','unh','vz','v','wmt','wba'] | |
| ct = 0 | |
| grid = (3,10) # 3 lines 10 columns | |
| for l in range(0,grid[0]) : # 3 lines | |
| for c in range(0,grid[1]) : # 10 columns | |
| eixo = pyplot.subplot2grid(grid,(l,c), colspan=1, rowspan=1) | |
| dia = grafico(lista[ct],eixo) | |
| ct +=1 | |
| tt = "Dow Jones Industrial Average - " + dia | |
| pyplot.suptitle(tt,fontsize = 7) | |
| pyplot.subplots_adjust(left = 0.03, right = 0.97, top = 0.93, bottom = 0.03, hspace = 0.19, wspace = 0.14) # minor chart windows adjustments | |
| pyplot.show() | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment