carlosmoran97 · March 29, 2020 05:20
diff --git a/datos_elecciones.py b/datos_elecciones.py
 import pandas as pd
 import requests, lxml
 from bs4 import BeautifulSoup

 url = "https://www.tse.gob.sv/2018/escrutinio-final/asamblea/index.html"
 html = requests.get(url).text
 soup =  BeautifulSoup(html, 'lxml')
 BASE_URL = "https://www.tse.gob.sv/2018/escrutinio-final/asamblea"
 panels = soup.findAll("div", {"class": "panel-purple"})

 deptos_urls = []


 def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
                       truncate_sheet=False, 
                       **to_excel_kwargs):
    """
    Append a DataFrame [df] to existing Excel file [filename]
    into [sheet_name] Sheet.
    If [filename] doesn't exist, then this function will create it.

    Parameters:
      filename : File path or existing ExcelWriter
                 (Example: '/path/to/file.xlsx')
      df : dataframe to save to workbook
      sheet_name : Name of sheet which will contain DataFrame.
                   (default: 'Sheet1')
      startrow : upper left cell row to dump data frame.
                 Per default (startrow=None) calculate the last row
                 in the existing DF and write to the next row...
      truncate_sheet : truncate (remove and recreate) [sheet_name]
                       before writing DataFrame to Excel file
      to_excel_kwargs : arguments which will be passed to `DataFrame.to_excel()`
                        [can be dictionary]

    Returns: None
    """
    from openpyxl import load_workbook

    import pandas as pd

    # ignore [engine] parameter if it was passed
    if 'engine' in to_excel_kwargs:
        to_excel_kwargs.pop('engine')

    writer = pd.ExcelWriter(filename, engine='openpyxl')

    # Python 2.x: define [FileNotFoundError] exception if it doesn't exist 
    try:
        FileNotFoundError
    except NameError:
        FileNotFoundError = IOError


    try:
        # try to open an existing workbook
        writer.book = load_workbook(filename)

        # get the last row in the existing Excel sheet
        # if it was not specified explicitly
        if startrow is None and sheet_name in writer.book.sheetnames:
            startrow = writer.book[sheet_name].max_row

        # truncate sheet
        if truncate_sheet and sheet_name in writer.book.sheetnames:
            # index of [sheet_name] sheet
            idx = writer.book.sheetnames.index(sheet_name)
            # remove [sheet_name]
            writer.book.remove(writer.book.worksheets[idx])
            # create an empty sheet [sheet_name] using old index
            writer.book.create_sheet(sheet_name, idx)

        # copy existing sheets
        writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
    except FileNotFoundError:
        # file does not exist yet, we will create it
        pass

    if startrow is None:
        startrow = 0

    # write out the new sheet
    df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)

    # save the workbook
    writer.save()


 for panel in panels:
    deptos_urls.append(str(panel.findAll('a')[1]['href']))

 for panel in panels:
    df = pd.concat(pd.read_html(str(panel)))
    append_df_to_excel("data.xlsx", df, "DEP - " + str(panel.findAll('span')[1].text.split(" (")[0]))


 for url in deptos_urls:
    html = requests.get("{}/{}".format(BASE_URL, url)).text
    soup =  BeautifulSoup(html, 'lxml')
    panels = soup.findAll("div", {"class": "panel-purple"})
    for panel in panels:
        #print()
        df = pd.concat(pd.read_html(str(panel)))
        append_df_to_excel("data.xlsx", df, "MUN - " + str(panel.findAll('span')[1].text.split(" (")[0]))
	import pandas as pd
	import requests, lxml
	from bs4 import BeautifulSoup

	url = "https://www.tse.gob.sv/2018/escrutinio-final/asamblea/index.html"
	html = requests.get(url).text
	soup = BeautifulSoup(html, 'lxml')
	BASE_URL = "https://www.tse.gob.sv/2018/escrutinio-final/asamblea"
	panels = soup.findAll("div", {"class": "panel-purple"})

	deptos_urls = []


	def append_df_to_excel(filename, df, sheet_name='Sheet1', startrow=None,
	truncate_sheet=False,
	**to_excel_kwargs):
	"""
	Append a DataFrame [df] to existing Excel file [filename]
	into [sheet_name] Sheet.
	If [filename] doesn't exist, then this function will create it.

	Parameters:
	filename : File path or existing ExcelWriter
	(Example: '/path/to/file.xlsx')
	df : dataframe to save to workbook
	sheet_name : Name of sheet which will contain DataFrame.
	(default: 'Sheet1')
	startrow : upper left cell row to dump data frame.
	Per default (startrow=None) calculate the last row
	in the existing DF and write to the next row...
	truncate_sheet : truncate (remove and recreate) [sheet_name]
	before writing DataFrame to Excel file
	to_excel_kwargs : arguments which will be passed to `DataFrame.to_excel()`
	[can be dictionary]

	Returns: None
	"""
	from openpyxl import load_workbook

	import pandas as pd

	# ignore [engine] parameter if it was passed
	if 'engine' in to_excel_kwargs:
	to_excel_kwargs.pop('engine')

	writer = pd.ExcelWriter(filename, engine='openpyxl')

	# Python 2.x: define [FileNotFoundError] exception if it doesn't exist
	try:
	FileNotFoundError
	except NameError:
	FileNotFoundError = IOError


	try:
	# try to open an existing workbook
	writer.book = load_workbook(filename)

	# get the last row in the existing Excel sheet
	# if it was not specified explicitly
	if startrow is None and sheet_name in writer.book.sheetnames:
	startrow = writer.book[sheet_name].max_row

	# truncate sheet
	if truncate_sheet and sheet_name in writer.book.sheetnames:
	# index of [sheet_name] sheet
	idx = writer.book.sheetnames.index(sheet_name)
	# remove [sheet_name]
	writer.book.remove(writer.book.worksheets[idx])
	# create an empty sheet [sheet_name] using old index
	writer.book.create_sheet(sheet_name, idx)

	# copy existing sheets
	writer.sheets = {ws.title:ws for ws in writer.book.worksheets}
	except FileNotFoundError:
	# file does not exist yet, we will create it
	pass

	if startrow is None:
	startrow = 0

	# write out the new sheet
	df.to_excel(writer, sheet_name, startrow=startrow, **to_excel_kwargs)

	# save the workbook
	writer.save()


	for panel in panels:
	deptos_urls.append(str(panel.findAll('a')[1]['href']))

	for panel in panels:
	df = pd.concat(pd.read_html(str(panel)))
	append_df_to_excel("data.xlsx", df, "DEP - " + str(panel.findAll('span')[1].text.split(" (")[0]))


	for url in deptos_urls:
	html = requests.get("{}/{}".format(BASE_URL, url)).text
	soup = BeautifulSoup(html, 'lxml')
	panels = soup.findAll("div", {"class": "panel-purple"})
	for panel in panels:
	#print()
	df = pd.concat(pd.read_html(str(panel)))
	append_df_to_excel("data.xlsx", df, "MUN - " + str(panel.findAll('span')[1].text.split(" (")[0]))
No results found