Skip to content

Instantly share code, notes, and snippets.

@parth-paradkar
Created April 2, 2021 09:47
Show Gist options
  • Select an option

  • Save parth-paradkar/53625db1d1ec4fdd8bb1c642dfc00ad9 to your computer and use it in GitHub Desktop.

Select an option

Save parth-paradkar/53625db1d1ec4fdd8bb1c642dfc00ad9 to your computer and use it in GitHub Desktop.
Scrape Facebook page for posts
from selenium.webdriver import Firefox
from os import getenv
import time
def refresh_feed_elements(driver: Firefox):
main_feed = driver.find_elements_by_xpath(".//div[@role='feed']")[1]
posts = main_feed.find_elements_by_xpath("./div")
return main_feed, posts
def login(driver: Firefox, email: str, password: str):
driver.get("https://www.facebook.com/")
driver.find_element_by_id("email").send_keys(email)
driver.find_element_by_id("pass").send_keys(password)
driver.find_element_by_name("login").click()
def get_page(driver: Firefox, page: str):
driver.get(page)
time.sleep(3)
def scrape_page(page: str):
email = getenv("FB_EMAIL")
password = getenv("FB_PASSWORD")
driver = Firefox()
login(driver, email, password)
get_page(driver, page)
main_feed = driver.find_elements_by_xpath(".//div[@role='feed']")[1]
posts = main_feed.find_elements_by_xpath("./div")
post_class_name = posts[1].get_attribute("class")
num_processed_posts = 0
while len(posts) != num_processed_posts:
for i in range(num_processed_posts, len(posts)):
current_post = posts[i]
try:
current_post.find_element_by_xpath(".//div[text()[contains(., 'See More')]]").click()
except:
print("Exception! Post number:", i)
pass
process_post(current_post.text)
num_processed_posts += 1
posts = main_feed.find_elements_by_xpath("./div")
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(3)
main_feed = driver.find_elements_by_xpath(".//div[@role='feed']")[1]
posts = main_feed.find_elements_by_xpath("./div")
print("Posts processed:", num_processed_posts)
def process_post(post_text):
split_text = post_text.split("\n")
if(len(split_text) > 3):
file_path = f"confessions/{split_text[3]}.txt"
with open(file_path, "w") as f:
f.write(post_text)
print(f"Written to file {file_path}")
if __name__ == "__main__":
scrape_page("https://www.facebook.com/confesskgpee")
@parth-paradkar
Copy link
Author

This was written to scrape the KGP Confessions page on Facebook.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment