#!/usr/bin/python3 import requests from bs4 import BeautifulSoup, NavigableString, Tag from http import HTTPStatus from typing import List from dataclasses import dataclass from enum import Enum class MyrientConsole(Enum): unknown='unknown' playstation_3='playstation_3' gamecube='gamecube' @staticmethod def from_string(console: str): console = console.lower().strip() if console in 'unknown': return MyrientConsole.unknown elif console in 'playstation_3' or 'playstation 3': return MyrientConsole.playstation_3 elif console in 'gamecube': return MyrientConsole.gamecube else: raise NotImplementedError(f'Unsupported console {console}.') @staticmethod def to_domain(console: str): return MYRENT_DOMAINS[MyrientConsole.from_string(console)] MYRENT_DOMAINS = { MyrientConsole.playstation_3 : 'https://myrient.erista.me/files/No-Intro/Sony%20-%20PlayStation%203%20(PSN)%20(Content)/', MyrientConsole.gamecube : 'https://myrient.erista.me/files/Redump/Nintendo%20-%20GameCube%20-%20NKit%20RVZ%20[zstd-19-128k]/' } class ContentResponse: url: str = '' status_code: int = 0 content: str = '' bad_requests: List[HTTPStatus] = [ HTTPStatus.BAD_GATEWAY, HTTPStatus.BAD_REQUEST, HTTPStatus.GATEWAY_TIMEOUT, HTTPStatus.REQUEST_TIMEOUT, HTTPStatus.INTERNAL_SERVER_ERROR, ] def __init__(self, url: str, status_code: int, content: str): self.url = url self.status_code = status_code self.content = content def is_failure(self) -> bool: return self.status_code in self.bad_requests def is_success(self) -> bool: return self.status_code not in self.bad_requests class HttpContent: url: str = '' headers = { 'User Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0', } def __init__(self, url: str): self.url = url def fetch_url_content(self) -> ContentResponse: try: print(f'Making request to {self.url}.') response = requests.get(self.url, headers=self.headers, timeout=10) response.raise_for_status() # Raise HTTPError for bad responses (4xx and 5xx) print(f'Received response for {self.url} with status code {response.status_code}.') return ContentResponse(self.url, response.status_code, response.text) except requests.exceptions.HTTPError as errh: return ContentResponse(self.url, errh.response.status_code, errh.response.text) except requests.exceptions.ConnectionError as connection_error: return ContentResponse(self.url, connection_error.response.status_code, connection_error.response.text) except requests.exceptions.Timeout as connection_timeout: return ContentResponse(self.url, connection_timeout.response.status_code, connection_timeout.response.text) except requests.exceptions.RequestException as request_error: return ContentResponse(self.url, request_error.response.status_code, request_error.response.text) class MyrientTableRow: table_row: Tag def __init__(self, table_row: Tag): self.table_row = table_row def link(self) -> str: table_row_link_data_cell = self.table_row.find('td', attrs={ 'class' : 'link' }) if table_row_link_data_cell is None: raise RuntimeError('Could not find link tag in table row.') data_cell_link_a_tag = table_row_link_data_cell.find('a') if data_cell_link_a_tag is None: raise RuntimeError('Could not find a tag in table data cell.') elif isinstance(data_cell_link_a_tag, NavigableString): raise RuntimeError('Cannot traverse instance of NavigatableString.') elif isinstance(data_cell_link_a_tag, int): raise RuntimeError('Cannot traverse instance of int.') href = data_cell_link_a_tag.get('href') if href is None: raise RuntimeError('Could not obtain href from table data cell.') elif isinstance(href, List): return href[0] return href def title(self) -> str: table_row_link_data_cell = self.table_row.find('td') if table_row_link_data_cell is None: raise RuntimeError('Could not find table data cells in table row.') data_cell_link_a_tag = table_row_link_data_cell.find('a') if data_cell_link_a_tag is None: raise RuntimeError('Could not find a tag in table data cell.') elif isinstance(data_cell_link_a_tag, NavigableString): raise RuntimeError('Cannot traverse instance of NavigatableString.') elif isinstance(data_cell_link_a_tag, int): raise RuntimeError('Cannot traverse instance of int.') title = data_cell_link_a_tag.get('title') if title is None: raise RuntimeError('Could not obtain href from table data cell.') elif isinstance(title, List): return title[0] return title def size(self) -> str: return '' def date(self) -> str: return '' class MyrientPlaystation3Parser: content: BeautifulSoup query: str def __init__(self, content: str, query: str): self.content = BeautifulSoup(content, features="html.parser") self.query = query def parse_content(self) -> List[MyrientTableRow]: table = self.content.find('table', attrs={ 'id' : 'list' }) if table is None: raise RuntimeError('Could not find table in provided content.') elif isinstance(table, NavigableString): raise RuntimeError('Cannot traverse instance of NavigatableString.') parsed_table_rows: List[MyrientTableRow] = [] for index, table_row in enumerate(table.find_all('tr')): # skip headers and file traversal row if index == 0 or index == 1: continue elif table_row is None: continue parsed_table_rows.append(MyrientTableRow(table_row)) return parsed_table_rows class MyrientGamecubeParser: content: str query: str def __init__(self, content: str, query: str): self.content = content self.query = query def parse_content(self) -> List[MyrientTableRow]: return [] @dataclass class ConsoleParser: console: MyrientConsole content: ContentResponse query: str @property def results(self) -> List[MyrientTableRow]: print(f'Parsing console {self.console}.') parsed_results = [] if (self.console == MyrientConsole.playstation_3): parsed_results = MyrientPlaystation3Parser(self.content.content, self.query).parse_content() elif (self.console == MyrientConsole.gamecube): parsed_results = MyrientGamecubeParser(self.content.content, self.query).parse_content() else: print(f'No parser found for {self.console}.') raise RuntimeError(f'Unsupported console: {self.console}') return [result for result in parsed_results if self.query.lower().strip() in result.title().lower().strip()] def main(): """ entry point for script """ game_title = input('Please input your game title: ') game_console = input('Please input the console: ') http_content_response = HttpContent(MyrientConsole.to_domain(game_console)).fetch_url_content() if (http_content_response.is_failure()): raise RuntimeError(f'Failure to obtain response content. Status Code: {http_content_response.status_code}, Message: {http_content_response.content}') parsed_myrient_results = ConsoleParser(MyrientConsole.from_string(game_console), http_content_response, game_title).results for query_result in parsed_myrient_results: print(query_result.title()) if __name__ == '__main__': main()