import axios from 'axios'; import * as cheerio from 'cheerio'; import * as iconvLite from 'iconv-lite'; export const fetchHtml = async (webUrl: string) => { try { const { data: htmlBuffer, headers } = await axios.get(webUrl, { responseType: 'arraybuffer', headers: { 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', }, }); let charset: string | undefined = getCharset(headers['content-type']); let html = iconvLite.decode(Buffer.from(htmlBuffer), charset || 'utf-8'); let $ = cheerio.load(html, null, false); if (!charset) { charset = $('meta[charset]').attr('charset'); if (!charset) { charset = getCharset( $('meta[http-equiv="Content-Type"]').attr('content'), ); } if (charset) { charset = charset.toLowerCase(); } if (charset && charset !== 'utf8' && charset !== 'utf-8') { html = iconvLite.decode(Buffer.from(htmlBuffer), charset); $ = cheerio.load(html, null, false); } } return $; } catch (error) { return; } }; const getCharset = (str?: string) => { if (!str) return; return /charset=(.*)/i.exec(str.replace(/;/g, ''))?.[1]; };