# importing libraries / dependencies import os from selenium import webdriver from selenium.webdriver.chrome.options import Options import csv import time import datetime import pytz starttime = time.time() # get the current time (for interval calculatio) date = datetime.datetime.now() # get the current date (for filename) # specify options for google chrome in headless mode (runs without GUI) chrome_options = Options() chrome_options.add_argument("--headless") interval = 2.0 # interval for crawler in seconds symbol = "NDX" # symbol that is used in CSV file market_open_hour = 14 # hour when the market opens (in UTC time) market_open_minute = 30 # minute when the market opens market_close_hour = 21 # hour when the market closes (in UTC time) # run the google chrome driver driver = webdriver.Chrome(executable_path=os.path.abspath("chromedriver"), chrome_options=chrome_options) driver.get("https://finance.yahoo.com/quote/%5ENDX?p=&guccounter=1") # define the filename of the csv file (e.g. NDX-2020-11-26.csv) csvfile = "{}-{}-{}-{}.csv".format(symbol, date.year, date.month, date.day) # setting this to false will end the crawling running = True # open CSV file with open(csvfile, 'w') as file: writer = csv.writer(file) writer.writerow(["symbol", "price", "timestamp"]) # write first line in csv file while running: # find the element by its xpath on the website price = driver.find_element_by_xpath('//div[@id="quote-market-notice"]/preceding-sibling::span[2]').text # get the current UTC timestamp timestamp = datetime.datetime.now(pytz.utc) # only store it, if market is open if (timestamp.hour >= market_open_hour) and (timestamp.minute >= market_open_minute) and (timestamp.hour < market_close_hour): print("symbol: " + symbol + ",price: " + price + ", timestamp: " + str(timestamp)) # write it to CSV writer.writerow([symbol, price, timestamp]) elif timestamp.hour > market_close_hour: print("market closed, ", str(timestamp)) # stop crawling running = False else: print("market not opened yet, ", str(timestamp)) # repeat in the given interval (program will sleep for the interval specified) time.sleep(interval - ((time.time() - starttime) % interval)) driver.close()