rodjul · September 26, 2022 18:08
diff --git a/slideshare_download.py b/slideshare_download.py
 import requests
 from bs4 import BeautifulSoup
 from getpass import getpass
 import os
 import threading


 headers = {
    "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8",
    "Accept-Language": "pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3",
    "Accept-Encoding": "gzip, deflate",
    "Connection": "close",
    "Referer": "https://slidemodel.com/",
    "Upgrade-Insecure-Requests": "1"
 }
 # proxy = {"http":"http://127.0.0.1:8080", "https":"http://127.0.0.1:8080"}
 proxy = {}

 session = requests.Session()

 folder = "templates"
 if not os.path.exists(folder):
    os.mkdir(folder)


 def get_length_pages(anchors) -> int:
    '''
    Parse nav page in https://slidemodel.com/templates/
    Returns:
        total pages
    '''
    nav_pages = [b for b in anchors if b.attrs.get('class') and b.attrs['class'][0] == "page-numbers"]
    return int( nav_pages[len(nav_pages)-1].text )

 def get_template_items(anchors) -> list:
    '''
    Obtain all template items (12 in total per page)
    '''
    return [b for b in anchors if b.attrs.get('class') and b.attrs['class'][0] == "item-thumbnail"]


 def download_template(page, total_pages, item):
    url = item.attrs['href']
    template_page = session.get(url, headers=headers)
    template_soup = BeautifulSoup(template_page.content, 'html.parser')
    inputs = template_soup.find_all("input")

    magn_ddaid = 0
    magn_ddid  = 0
    for input in inputs:
        if input.attrs.get("name") and input.attrs['name'] == "magn-ddaid":
            magn_ddaid =  input.attrs['value']
        if input.attrs.get("name") and input.attrs['name'] == "magn-ddid":
            magn_ddid =  input.attrs['value']

    # labels = template_soup.find_all("label")
    # filename = [label for label in labels if label.attrs.get("for") and label.attrs["for"][0] == magn_ddaid][0]
    label = template_soup.find("label")
    filename = label.attrs['title']
    filename = filename.replace("'", "").replace("`","")
    category = template_soup.find_all("dd")[0].text
    category = category.rstrip()

    try:
        if not os.path.exists(folder + "/" + category):
            os.mkdir(folder + "/" + category)
    except Exception as e:
        print(e)

    try:
        print(f"Downloading item from page {page} of {total_pages} - {category}/{filename}")
        download_file = session.post("https://slidemodel.com/download/", headers=headers, data={"magn-ddid":magn_ddid, "magn-ddaid":magn_ddaid }, proxies=proxy)

        with open(folder + "/" + category + "/" + filename, "wb") as f:
            f.write(download_file.content)
    except Exception as e:
        print(f"ERROR: failed to download {filename} - {url}")


 def main():
    # https://slidemodel.com/templates/
    # pegar as paginas 1 ... 258
    # -> a cada <div class="item">, acessar ele e obter os valores: magn-ddaid, magn-ddid e <span class="title" para salvar

    url_login = "https://slidemodel.com/account/login/"
    username = input("Username: ")
    password = getpass()

    print("Getting cookies...")
    response = session.get(url_login, headers=headers, proxies=proxy)
    soup = BeautifulSoup(response.content, 'html.parser')

    rcp_login_nonce = soup.find_all("input")[7].attrs['value']

    print("Authenticating...")
    response = session.post(url_login, data={
        "rcp_user_login":username,
        "rcp_user_pass":password,
        "rcp_action":"login",
        "rcp_redirect":"https://slidemodel.com/account/login/",
        "rcp_login_nonce": rcp_login_nonce
    }, headers=headers, proxies=proxy)

    print("Checking if logged with success")
    found = False
    for cookie in session.cookies.get_dict().keys():
        if cookie.startswith("wordpress_logged_in"):
            found = True
            break
    if not found:
        raise Exception("Invalid credentials")

    print("Logged with success")
    print("Starting downloading all files")

    #first page
    page = 1
    response = session.get("https://slidemodel.com/templates/", headers=headers, proxies=proxy)
    soup = BeautifulSoup(response.content, 'html.parser')

    anchors = soup.find_all('a')
    total_pages = get_length_pages(anchors)
    items = get_template_items(anchors)

    threads = []
    for item in items:
        # download_template(page, total_pages, item)
        t = threading.Thread(target=download_template , args=(page, total_pages, item,))
        threads.append(t)
        t.start()

    for t in threads:
        t.join()


    page += 1
    while page <= total_pages:
        response = session.get(f"https://slidemodel.com/templates/page/{page}/", headers=headers, proxies=proxy)
        soup = BeautifulSoup(response.content, 'html.parser')

        anchors = soup.find_all('a')
        items = get_template_items(anchors)

        threads = []
        for item in items:
            # download_template(page, total_pages, item)
            t = threading.Thread(target=download_template , args=(page, total_pages, item,))
            threads.append(t)
            t.start()

        for t in threads:
            t.join()

        page += 1




 if __name__ == "__main__":
    main()
	import requests
	from bs4 import BeautifulSoup
	from getpass import getpass
	import os
	import threading


	headers = {
	"User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.150 Safari/537.36",
	"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,/;q=0.8",
	"Accept-Language": "pt-BR,pt;q=0.8,en-US;q=0.5,en;q=0.3",
	"Accept-Encoding": "gzip, deflate",
	"Connection": "close",
	"Referer": "https://slidemodel.com/",
	"Upgrade-Insecure-Requests": "1"
	}
	# proxy = {"http":"http://127.0.0.1:8080", "https":"http://127.0.0.1:8080"}
	proxy = {}

	session = requests.Session()

	folder = "templates"
	if not os.path.exists(folder):
	os.mkdir(folder)


	def get_length_pages(anchors) -> int:
	'''
	Parse nav page in https://slidemodel.com/templates/
	Returns:
	total pages
	'''
	nav_pages = [b for b in anchors if b.attrs.get('class') and b.attrs['class'][0] == "page-numbers"]
	return int( nav_pages[len(nav_pages)-1].text )

	def get_template_items(anchors) -> list:
	'''
	Obtain all template items (12 in total per page)
	'''
	return [b for b in anchors if b.attrs.get('class') and b.attrs['class'][0] == "item-thumbnail"]


	def download_template(page, total_pages, item):
	url = item.attrs['href']
	template_page = session.get(url, headers=headers)
	template_soup = BeautifulSoup(template_page.content, 'html.parser')
	inputs = template_soup.find_all("input")

	magn_ddaid = 0
	magn_ddid = 0
	for input in inputs:
	if input.attrs.get("name") and input.attrs['name'] == "magn-ddaid":
	magn_ddaid = input.attrs['value']
	if input.attrs.get("name") and input.attrs['name'] == "magn-ddid":
	magn_ddid = input.attrs['value']

	# labels = template_soup.find_all("label")
	# filename = [label for label in labels if label.attrs.get("for") and label.attrs["for"][0] == magn_ddaid][0]
	label = template_soup.find("label")
	filename = label.attrs['title']
	filename = filename.replace("'", "").replace("`","")
	category = template_soup.find_all("dd")[0].text
	category = category.rstrip()

	try:
	if not os.path.exists(folder + "/" + category):
	os.mkdir(folder + "/" + category)
	except Exception as e:
	print(e)

	try:
	print(f"Downloading item from page {page} of {total_pages} - {category}/{filename}")
	download_file = session.post("https://slidemodel.com/download/", headers=headers, data={"magn-ddid":magn_ddid, "magn-ddaid":magn_ddaid }, proxies=proxy)

	with open(folder + "/" + category + "/" + filename, "wb") as f:
	f.write(download_file.content)
	except Exception as e:
	print(f"ERROR: failed to download {filename} - {url}")


	def main():
	# https://slidemodel.com/templates/
	# pegar as paginas 1 ... 258
	# -> a cada <div class="item">, acessar ele e obter os valores: magn-ddaid, magn-ddid e <span class="title" para salvar

	url_login = "https://slidemodel.com/account/login/"
	username = input("Username: ")
	password = getpass()

	print("Getting cookies...")
	response = session.get(url_login, headers=headers, proxies=proxy)
	soup = BeautifulSoup(response.content, 'html.parser')

	rcp_login_nonce = soup.find_all("input")[7].attrs['value']

	print("Authenticating...")
	response = session.post(url_login, data={
	"rcp_user_login":username,
	"rcp_user_pass":password,
	"rcp_action":"login",
	"rcp_redirect":"https://slidemodel.com/account/login/",
	"rcp_login_nonce": rcp_login_nonce
	}, headers=headers, proxies=proxy)

	print("Checking if logged with success")
	found = False
	for cookie in session.cookies.get_dict().keys():
	if cookie.startswith("wordpress_logged_in"):
	found = True
	break
	if not found:
	raise Exception("Invalid credentials")

	print("Logged with success")
	print("Starting downloading all files")

	#first page
	page = 1
	response = session.get("https://slidemodel.com/templates/", headers=headers, proxies=proxy)
	soup = BeautifulSoup(response.content, 'html.parser')

	anchors = soup.find_all('a')
	total_pages = get_length_pages(anchors)
	items = get_template_items(anchors)

	threads = []
	for item in items:
	# download_template(page, total_pages, item)
	t = threading.Thread(target=download_template , args=(page, total_pages, item,))
	threads.append(t)
	t.start()

	for t in threads:
	t.join()


	page += 1
	while page <= total_pages:
	response = session.get(f"https://slidemodel.com/templates/page/{page}/", headers=headers, proxies=proxy)
	soup = BeautifulSoup(response.content, 'html.parser')

	anchors = soup.find_all('a')
	items = get_template_items(anchors)

	threads = []
	for item in items:
	# download_template(page, total_pages, item)
	t = threading.Thread(target=download_template , args=(page, total_pages, item,))
	threads.append(t)
	t.start()

	for t in threads:
	t.join()

	page += 1




	if __name__ == "__main__":
	main()