Skip to content

Instantly share code, notes, and snippets.

@lorey
Last active November 5, 2025 01:24
Show Gist options
  • Save lorey/079c5e178c9c9d3c30ad87df7f70491d to your computer and use it in GitHub Desktop.
Save lorey/079c5e178c9c9d3c30ad87df7f70491d to your computer and use it in GitHub Desktop.

Revisions

  1. lorey revised this gist Nov 3, 2020. 1 changed file with 5 additions and 0 deletions.
    5 changes: 5 additions & 0 deletions selenium_xhr_requests_via_performance_logging.py
    Original file line number Diff line number Diff line change
    @@ -4,6 +4,11 @@
    # for example on many JS-intensive/React-based websites
    #

    from time import sleep

    from selenium import webdriver
    from selenium.webdriver import DesiredCapabilities

    # make chrome log requests
    capabilities = DesiredCapabilities.CHROME
    capabilities["loggingPrefs"] = {"performance": "ALL"} # newer: goog:loggingPrefs
  2. lorey created this gist Nov 3, 2020.
    34 changes: 34 additions & 0 deletions selenium_xhr_requests_via_performance_logging.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,34 @@
    #
    # This small example shows you how to access JS-based requests via Selenium
    # Like this, one can access raw data for scraping,
    # for example on many JS-intensive/React-based websites
    #

    # make chrome log requests
    capabilities = DesiredCapabilities.CHROME
    capabilities["loggingPrefs"] = {"performance": "ALL"} # newer: goog:loggingPrefs
    driver = webdriver.Chrome(
    desired_capabilities=capabilities, executable_path="./chromedriver"
    )

    # fetch a site that does xhr requests
    driver.get("https://sitewithajaxorsomething.com")
    sleep(5) # wait for the requests to take place

    # extract requests from logs
    logs_raw = driver.get_log("performance")
    logs = [json.loads(lr["message"])["message"] for lr in logs_raw]

    def log_filter(log_):
    return (
    # is an actual response
    log_["method"] == "Network.responseReceived"
    # and json
    and "json" in log_["params"]["response"]["mimeType"]
    )

    for log in filter(log_filter, logs):
    request_id = log["params"]["requestId"]
    resp_url = log["params"]["response"]["url"]
    print(f"Caught {resp_url}")
    print(driver.execute_cdp_cmd("Network.getResponseBody", {"requestId": request_id}))