Last active
June 10, 2023 08:48
-
-
Save hexianga/9f1ce67fd2325a628e027f2fee15b624 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import axios from 'axios'; | |
| import * as cheerio from 'cheerio'; | |
| import * as iconvLite from 'iconv-lite'; | |
| export const fetchHtml = async (webUrl: string) => { | |
| try { | |
| const { data: htmlBuffer, headers } = await axios.get(webUrl, { | |
| responseType: 'arraybuffer', | |
| headers: { | |
| 'User-Agent': | |
| 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36', | |
| }, | |
| }); | |
| let charset: string | undefined = getCharset(headers['content-type']); | |
| let html = iconvLite.decode(Buffer.from(htmlBuffer), charset || 'utf-8'); | |
| let $ = cheerio.load(html, null, false); | |
| if (!charset) { | |
| charset = $('meta[charset]').attr('charset'); | |
| if (!charset) { | |
| charset = getCharset( | |
| $('meta[http-equiv="Content-Type"]').attr('content'), | |
| ); | |
| } | |
| if (charset) { | |
| charset = charset.toLowerCase(); | |
| } | |
| if (charset && charset !== 'utf8' && charset !== 'utf-8') { | |
| html = iconvLite.decode(Buffer.from(htmlBuffer), charset); | |
| $ = cheerio.load(html, null, false); | |
| } | |
| } | |
| return $; | |
| } catch (error) { | |
| return; | |
| } | |
| }; | |
| const getCharset = (str?: string) => { | |
| if (!str) return; | |
| return /charset=(.*)/i.exec(str.replace(/;/g, ''))?.[1]; | |
| }; | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import * as path from 'path'; | |
| import { fetchHtml } from './fetch-html'; | |
| export interface WebMetaInfo { | |
| title?: string; | |
| description?: string; | |
| keyword?: string; | |
| favicon?: string; | |
| image?: string; | |
| url: string; | |
| } | |
| export const fetchWebMetaInfo = async ( | |
| webUrl: string, | |
| ): Promise<WebMetaInfo | undefined> => { | |
| try { | |
| const $ = await fetchHtml(webUrl); | |
| if (!$) return; | |
| const title = $('title').text(); | |
| let description = $( | |
| 'meta[name*="description"], meta[property*="og:description"]', | |
| ).attr('content'); | |
| // https://ogp.me/ | |
| // og 对 seo 的影响:https://cloud.tencent.com/developer/article/1617791 | |
| const image = $( | |
| 'meta[property*="og:image"], meta[property*="og:image:url"], meta[property*="og:image:secure_url"]', | |
| ).attr('content'); | |
| const keyword = $('meta[name*="keywords"]').attr('content'); | |
| const url = new URL(webUrl); | |
| let favicon = $('link[rel*="icon"]').attr('href'); | |
| if (favicon) { | |
| if (!/^((https?:\/\/)|(data:))/.test(favicon)) { | |
| if (favicon.startsWith('/')) { | |
| favicon = path.join(url.origin, favicon); | |
| } | |
| if (favicon.startsWith('.')) { | |
| favicon = path.join(url.origin, url.pathname, '../', favicon); | |
| } | |
| } | |
| } else { | |
| favicon = url.origin + '/favicon.ico'; | |
| } | |
| if (!description) { | |
| description = $('body').text().slice(0, 100); | |
| } | |
| return { | |
| url: webUrl, | |
| title, | |
| description, | |
| keyword, | |
| favicon, | |
| image, | |
| }; | |
| } catch (error) { | |
| console.log(error); | |
| return; | |
| } | |
| }; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment