def get_aid_from_url(url: str) -> (str, str): # from get_aid_from_url in PyPtt # 檢查是否符合 PTT BBS 文章網址格式 pattern = re.compile('https://www.ptt.cc/bbs/[-.\w]+/M.[\d]+.A[.\w]*.html') r = pattern.search(url) if r is None: raise ValueError('url must be www.ptt.cc article url') # 演算法參考 https://www.ptt.cc/man/C_Chat/DE98/DFF5/DB61/M.1419434423.A.DF0.html # aid 字元表 aid_table = '0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz-_' board = url[23:] board = board[:board.find('/')] temp = url[url.rfind('/') + 1:].split('.') # print(temp) id_0 = int(temp[1]) # dec aid_0 = '' for _ in range(6): index = id_0 % 64 aid_0 = f'{aid_table[index]}{aid_0}' id_0 = int(id_0 / 64) if temp[3] != 'html': id_1 = int(temp[3], 16) # hex aid_1 = '' for _ in range(2): index = id_1 % 64 aid_1 = f'{aid_table[index]}{aid_1}' id_1 = int(id_1 / 64) else: aid_1 = '00' aid = f'{aid_0}{aid_1}' return board, aid