Skip to content

Instantly share code, notes, and snippets.

@ValRCS
Created July 23, 2018 10:20
Show Gist options
  • Select an option

  • Save ValRCS/1f38261e4e6169a3d90e86c8264753a2 to your computer and use it in GitHub Desktop.

Select an option

Save ValRCS/1f38261e4e6169a3d90e86c8264753a2 to your computer and use it in GitHub Desktop.
Yahoo Finance web scraper for Dow Jones stock prices
# -*- coding: utf-8 -*-
from mpl_finance import candlestick_ohlc
import numpy as np
from matplotlib import pyplot
import pandas as pd
from bs4 import BeautifulSoup
import urllib.request
import re
pyplot.style.use("ggplot")
pyplot.figure(figsize=(12.80,7.20))
def getstk(code) : # Scrapes the data
url = "https://finance.yahoo.com/quote/"+code+"/history"
webpage = urllib.request.urlopen(url)
soup = BeautifulSoup(webpage,"lxml")
d = list(soup.findAll("td"))
tf = []
def isfloat(v) :
try :
float(v)
return True
except:
return False
def isvol(v) :
try :
int("".join(v.split(","))) == int
return True
except:
return False
for n in range(0, len(d)-1) :
x = re.split("[>+<]",str(d[n]))
x1 = re.split("[>+<]",str(d[n+1]))
if len(x) == 5 : # Length of 0 day volume
if x[4] == "" : # Days of zero volume
tf.append(0)
if len(x) == 9 : # normal day length
if isfloat(x[4]) == True : # Prices
tf.append(float(x[4]))
if isvol(x[4]) == True : # Volume
tf.append(int("".join(x[4].split(","))))
if isvol(x[4]) == False and isfloat(x[4]) == False: # Dates
if len(x1) < 10 : # Dividend has len of 17
tf.append(x[4])
stockFile = pd.DataFrame()
def MakeDf(name,v) :
l = []
for n in range(v,len(tf),7) :
l.append(tf[n])
stockFile[name] = l[::-1]
return
MakeDf("Date",0)
MakeDf("Open",1)
MakeDf("High",2)
MakeDf("Low",3)
MakeDf("Close",4)
MakeDf("Adj.Close",5)
MakeDf("Volume",6)
# Adjusting the prices for dividends
stockFile["Dif"] = stockFile["Adj.Close"] - stockFile["Close"]
stockFile["Open.Adj"] = stockFile["Open"] + stockFile["Dif"]
stockFile["High.Adj"] = stockFile["High"] + stockFile["Dif"]
stockFile["Low.Adj"] = stockFile["Low"] + stockFile["Dif"]
stockFile = stockFile.query('Volume !=0') # excluding days without moviment (volume = 0)
stockFile = stockFile.dropna()
stockFile.index = range(len(stockFile)) # Adjusting the index if a drop is done
return stockFile
def grafico(stk,eixo) : # Plots the candlestick chart and the moving average level
stock = getstk(stk)
dia = stock["Date"][len(stock)-1]
print ()
print ("Stock : ", stk)
print ("Last date : ", dia)
print ("Last price : ", stock["Close"][len(stock)-1])
#Moving average calculation
MM1 = list(stock["Close"].rolling(window = 20).mean()) # 20-day period
OHLC = []
prd = 12 # day's window for each stock
#Axis for drawing the last day's moving average level
eixoX = np.linspace(1, prd+1, num = prd+1)
eixoY = [MM1[-1]] * (prd+1)
print ("Moving average : ", round(MM1[-1],2))
l = len(stock)
dt = 1
for n in range (l-prd-1,l,) : # Making a candlestick list
xm = dt,stock['Open.Adj'].iloc[n],stock['High.Adj'].iloc[n],stock['Low.Adj'].iloc[n],stock['Adj.Close'].iloc[n]
OHLC.append(xm)
dt +=1
bboxP = dict(boxstyle = "round,pad=0.1",fc="white",ec = "black",lw = 0.5, alpha = 0.35)
candlestick_ohlc(eixo, OHLC, width=0.7, colorup = 'green', colordown = 'red')
pyplot.plot(eixoX, eixoY, color = "darkorange", alpha = 0.5) # last day moving average level
pyplot.text(prd+1,OHLC[-1][4],str(round(OHLC[-1][4],2)), ha = "center", va = "center", size = 5.4, bbox = bboxP) # last price label
g = pyplot.gca()
pyplot.title(stk,fontsize = 7.6)
g.set_xticklabels([])
g.set_yticklabels([])
return (dia)
# Stock list for Dow Jones Ind. Average
lista = ['mmm','axp','aapl','ba','cat','cvx','csco','ko','dis','dwdp',
'xom','gs','hd','ibm','intc','jnj','jpm','mcd','mrk','msft',
'nke','pfe','pg','trv','utx','unh','vz','v','wmt','wba']
ct = 0
grid = (3,10) # 3 lines 10 columns
for l in range(0,grid[0]) : # 3 lines
for c in range(0,grid[1]) : # 10 columns
eixo = pyplot.subplot2grid(grid,(l,c), colspan=1, rowspan=1)
dia = grafico(lista[ct],eixo)
ct +=1
tt = "Dow Jones Industrial Average - " + dia
pyplot.suptitle(tt,fontsize = 7)
pyplot.subplots_adjust(left = 0.03, right = 0.97, top = 0.93, bottom = 0.03, hspace = 0.19, wspace = 0.14) # minor chart windows adjustments
pyplot.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment