Created
November 27, 2020 11:17
-
-
Save miczed/a6204c86ca3338916b9ed2973425e3f0 to your computer and use it in GitHub Desktop.
Revisions
-
miczed created this gist
Nov 27, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,61 @@ # importing libraries / dependencies import os from selenium import webdriver from selenium.webdriver.chrome.options import Options import csv import time import datetime import pytz starttime = time.time() # get the current time (for interval calculatio) date = datetime.datetime.now() # get the current date (for filename) # specify options for google chrome in headless mode (runs without GUI) chrome_options = Options() chrome_options.add_argument("--headless") interval = 2.0 # interval for crawler in seconds symbol = "NDX" # symbol that is used in CSV file market_open_hour = 14 # hour when the market opens (in UTC time) market_open_minute = 30 # minute when the market opens market_close_hour = 21 # hour when the market closes (in UTC time) # run the google chrome driver driver = webdriver.Chrome(executable_path=os.path.abspath("chromedriver"), chrome_options=chrome_options) driver.get("https://finance.yahoo.com/quote/%5ENDX?p=&guccounter=1") # define the filename of the csv file (e.g. NDX-2020-11-26.csv) csvfile = "{}-{}-{}-{}.csv".format(symbol, date.year, date.month, date.day) # setting this to false will end the crawling running = True # open CSV file with open(csvfile, 'w') as file: writer = csv.writer(file) writer.writerow(["symbol", "price", "timestamp"]) # write first line in csv file while running: # find the element by its xpath on the website price = driver.find_element_by_xpath('//div[@id="quote-market-notice"]/preceding-sibling::span[2]').text # get the current UTC timestamp timestamp = datetime.datetime.now(pytz.utc) # only store it, if market is open if (timestamp.hour >= market_open_hour) and (timestamp.minute >= market_open_minute) and (timestamp.hour < market_close_hour): print("symbol: " + symbol + ",price: " + price + ", timestamp: " + str(timestamp)) # write it to CSV writer.writerow([symbol, price, timestamp]) elif timestamp.hour > market_close_hour: print("market closed, ", str(timestamp)) # stop crawling running = False else: print("market not opened yet, ", str(timestamp)) # repeat in the given interval (program will sleep for the interval specified) time.sleep(interval - ((time.time() - starttime) % interval)) driver.close()