cboulanger · November 9, 2023 10:13 · Nov 9, 2023
diff --git a/parse-easychair-program.py b/parse-easychair-program.py
@@ -0,0 +1,94 @@
+import dateparser
+import re
+import requests
+from bs4 import BeautifulSoup
+import csv
+from collections import defaultdict
+
+url = "https://easychair.org/smart-program/<conference_name>/program.html"
+track_url = "https://easychair.org/smart-program/<conference_name>/de_tracks.html"
+css_url = "https://easychair.org/smart-program/<conference_name>/program.css"
+page_title = "Conference Name - Tracks und Sessions"
+
+def download_session_data(url):
+    response = requests.get(url)
+    soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
+
+
+    # Initialize CSV writer
+    with open('conference_data.csv', 'w', newline='', encoding='utf-8') as file:
+        writer = csv.writer(file)
+        writer.writerow(["session_id", "session", "date", "interval", "track", "title"])
+
+        # Initialize date
+        date = None
+
+        for div in soup.find_all('div'):
+            # Check if div contains date
+            if div.get('class') == ['date']:
+                date = dateparser.parse(div.text)
+                date = date.strftime('%d.%m.%Y')
+
+            # Check if div is a session
+            elif div.get('class') and 'session' in div.get('class'):
+                session_id = div.find('a').get('name').replace('session:', '')
+                heading = div.find('div', class_='heading').text
+                matches = re.search(r'(.+)\s*Session\s*(\w+)\s*:\s*Track\s*(\w+):\s*(.+)', heading)
+                if matches:
+                    interval, session, track, title = matches.groups()
+                    writer.writerow([session_id, session, date, interval.strip(), track, title])
+
+def create_track_overview(track_url, program_url, page_title, css_url):
+    # Download and parse the track page
+    response = requests.get(track_url)
+    soup = BeautifulSoup(response.content, 'html.parser', from_encoding='utf-8')
+
+    # Extract track titles
+    tracks = {}
+    i = 1
+    for h3 in soup.find_all('h3'):
+        a = h3.find('a')
+        if a:
+            tracks[str(i)] = a.text.strip()
+            i+=1
+
+    # Parse the CSV file
+    sessions = defaultdict(list)
+    with open('conference_data.csv', 'r', newline='', encoding='utf-8') as file:
+        reader = csv.reader(file)
+        next(reader)  # Skip header
+        for row in reader:
+            session_id, session, date, interval, track, title = row
+            if session_id != "" and track != "":
+                sessions[track].append((date, interval, title, session_id, session))
+
+    # Sort sessions by date and interval
+    for track in sessions:
+        sessions[track].sort()
+
+    # Create the HTML page
+    html = '<html><head>'
+    html += '<meta charset="UTF-8">'
+    html += f'<title>{page_title}</title>'
+    html += f'<link rel="stylesheet" type="text/css" href="{css_url}">'
+    html += '<style>td { padding-right: 20px; }</style>'
+    html += '</head><body>'
+
+    for track in sorted(sessions.keys()):
+        session_list = sessions[track]
+        html += f'<h2>{tracks[track]}</h2>'
+        html += '<table style="">'
+        for date, interval, title, session_id, session in session_list:
+            html += f'<tr><td>{date}</td><td>{interval}</td><td>{session}</td><td><a href="{program_url}#session:{session_id}">{title}</a></td></tr>'
+        html += '</table>'
+    html += '</body></html>'
+
+    # Write the HTML to a file
+    with open('sessions_by_track.html', 'w', encoding='utf-8') as file:
+        file.write(html)
+
+
+
+download_session_data(url)
+create_track_overview(track_url, url, page_title= page_title, css_url=css_url)
+#%%
No results found