Skip to content

Instantly share code, notes, and snippets.

@RebornQ
Forked from ninehills/chatpdf-zh.ipynb
Created March 26, 2023 14:22
Show Gist options
  • Save RebornQ/5bf1d395d3122f7613bbbeaa0533596d to your computer and use it in GitHub Desktop.
Save RebornQ/5bf1d395d3122f7613bbbeaa0533596d to your computer and use it in GitHub Desktop.

Revisions

  1. @ninehills ninehills revised this gist Mar 26, 2023. 1 changed file with 84 additions and 41 deletions.
    125 changes: 84 additions & 41 deletions chatpdf-zh.ipynb
    Original file line number Diff line number Diff line change
    @@ -26,7 +26,7 @@
    {
    "cell_type": "code",
    "source": [
    "# 建议将 PDF 文件保存在 Google Drive 上,左侧 Connect to Google Drive\n",
    "# 建议将 PDF 文件保存在 Google Drive 上\n",
    "\n",
    "from google.colab import drive\n",
    "drive.mount('/content/drive')"
    @@ -36,7 +36,7 @@
    "colab": {
    "base_uri": "https://localhost:8080/"
    },
    "outputId": "ab33bce1-88fd-432d-d65e-1c9892a860a8"
    "outputId": "65b23095-a911-4efa-b7f4-c9eec3a034a2"
    },
    "execution_count": 1,
    "outputs": [
    @@ -52,14 +52,19 @@
    {
    "cell_type": "code",
    "source": [
    "# 在 Google Drive 上的工作目录\n",
    "WORK_DIR = \"/content/drive/MyDrive/ChatGPT/Notebooks/ChatPDF/\"\n",
    "# env 文件名称,里面存储 OPENAI API KEY\n",
    "ENV_FILE = \".env\"\n",
    "# 处理处理的原文件\n",
    "SRC_FILE = \"jianshang.pdf\"\n",
    "# 缓存的向量 index 文件\n",
    "INDEX_FILE = SRC_FILE + \".index\""
    ],
    "metadata": {
    "id": "UXw1TWw_nj_F"
    },
    "execution_count": 3,
    "execution_count": 6,
    "outputs": []
    },
    {
    @@ -74,6 +79,21 @@
    "metadata": {
    "id": "Aqef8N2RlUpo"
    },
    "execution_count": 3,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "import logging\n",
    "import sys\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)\n",
    "logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))"
    ],
    "metadata": {
    "id": "GybUa0BykExM"
    },
    "execution_count": 4,
    "outputs": []
    },
    @@ -91,17 +111,20 @@
    "metadata": {
    "id": "Vp6JcErhmt_w"
    },
    "execution_count": 20,
    "execution_count": 5,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "# Load environment variables, Just create a .env file with your OPENAI_API_KEY then load it.\n",
    "# Load environment variables (OPENAI_API_KEY)\n",
    "\n",
    "import os \n",
    "import os\n",
    "import shutil\n",
    "from dotenv import load_dotenv\n",
    "\n",
    "shutil.copyfile(os.path.join(WORK_DIR, ENV_FILE), \".env\")\n",
    "\n",
    "load_dotenv()\n",
    "\n",
    "# API configuration\n",
    @@ -144,26 +167,28 @@
    "\n",
    "if os.path.exists(index_file) == False:\n",
    " documents = loader.load_data(file=os.path.join(Path(WORK_DIR), Path(SRC_FILE)))\n",
    " index = GPTSimpleVectorIndex(documents)\n",
    " # 默认 chunk_size_limit=4096,缩减 chunk_size 可以有效降低 Token 使用,但是会导致最终提供给 GPT 的上下文变少,从而影响效果\n",
    " index = GPTSimpleVectorIndex(documents, chunk_size_limit=1024)\n",
    " index.save_to_disk(index_file)\n",
    "else:\n",
    " index = GPTSimpleVectorIndex.load_from_disk(index_file)\n"
    ],
    "metadata": {
    "id": "Cb98YMtrnTxU"
    },
    "execution_count": 9,
    "execution_count": 8,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.2, model_name=\"gpt-3.5-turbo\"))\n",
    "llm_predictor_stream = LLMPredictor(llm=ChatOpenAI(\n",
    " temperature=0.2, model_name=\"gpt-3.5-turbo\", stream=True,\n",
    " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True\n",
    " )\n",
    ")\n",
    "llm_predictor = LLMPredictor(llm=ChatOpenAI(\n",
    " # 将 temperature 范围为 0-1,越接近0越具备创造性\n",
    " # 典型值:0(arc53/DocsGPT)、0.2(madawei2699/myGPTReader)\n",
    " temperature=0,\n",
    " model_name=\"gpt-3.5-turbo\",\n",
    "))\n",
    "\n",
    "\n",
    "QUESTION_ANSWER_PROMPT_TMPL = (\n",
    " \"Context information is below. \\n\"\n",
    @@ -177,6 +202,7 @@
    "You are an AI assistant providing helpful advice. You are given the following extracted parts of a long document and a question. Provide a conversational answer based on the context provided.\n",
    "If you can't find the answer in the context below, just say \"Hmm, I'm not sure.\" Don't try to make up an answer.\n",
    "If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.\n",
    "\n",
    "Context information is below.\n",
    "=========\n",
    "{context_str}\n",
    @@ -187,6 +213,9 @@
    "QUESTION_ANSWER_PROMPT = QuestionAnswerPrompt(QUESTION_ANSWER_PROMPT_TMPL_2)\n",
    "\n",
    "def chat(query):\n",
    " # 当 chunk_size 较小以及问题较为简洁时,回答的语言就不是很好控制,需要在问题增加内容。\n",
    " # 目前在 prompt 上进行多次尝试无效,所以直接加到query 上。\n",
    " query = query + \" 请使用中文回答。\"\n",
    " result = index.query(\n",
    " query,\n",
    " llm_predictor=llm_predictor,\n",
    @@ -216,18 +245,21 @@
    "def chat_stream(query):\n",
    " return index.query(\n",
    " query,\n",
    " llm_predictor=llm_predictor_stream,\n",
    " llm_predictor=llm_predictor,\n",
    " text_qa_template=QUESTION_ANSWER_PROMPT,\n",
    " response_mode=\"tree_summarize\",\n",
    " similarity_top_k=3,\n",
    " streaming=True,\n",
    " mode=\"embedding\",\n",
    " )"
    " )\n",
    "\n",
    "# response_stream = chat_stream(\"这本书讲了什么?\")\n",
    "# response_stream.print_response_stream()"
    ],
    "metadata": {
    "id": "6ddjxclno8tg"
    },
    "execution_count": 28,
    "execution_count": 64,
    "outputs": []
    },
    {
    @@ -239,18 +271,18 @@
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 410
    "height": 457
    },
    "id": "psvZXfWirq31",
    "outputId": "16c58a16-c570-4b6a-c6cf-8552ddbf8828"
    "outputId": "a90d5eb2-8db5-4cbe-d651-693b84deb5f3"
    },
    "execution_count": 29,
    "execution_count": 65,
    "outputs": [
    {
    "output_type": "stream",
    "name": "stdout",
    "text": [
    "Token used: 10843, total used: 10843\n"
    "Token used: 4486, total used: 4486\n"
    ]
    },
    {
    @@ -259,7 +291,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Final Response:`** 这本书主要讲述了商周之变的历史背景和周朝的兴起,以及商文化和周文化的差异。它还提到了一些关于人祭习俗的历史知识,并分享了作者的研究经历和认知"
    "text/markdown": "**`Final Response:`** 这本书讲述了商周变革的历史背景和过程,包括周灭商等事件,同时也描绘了许多熟视无睹的场景,让读者可以更好地理解古代中国的思想、信仰、伦理、心态、风俗,以及军事、政治、制度、规则等方面。根据历史学家和教授的评价,这场变革对于华夏文明的意义更深、更远,是中国历史上的重要事件。作者的视角和写法独特,让人耳目一新。对于对古代中国有兴趣的研究者或普通读者来说,是一个很好的探索起点"
    },
    "metadata": {}
    },
    @@ -289,7 +321,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8000785245735972<br>**Text:** 如《象传》和《彖传》可能是周公作品。其他篇章里常出现“子日”,孔子 \n自己肯定不会这样写,它们应当是孔门弟子编写的。《周易》经传的详细知识, \n可参考廖明春《周易经传十五讲》,北京大学出版社,2...<br>"
    "text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8053380387024119<br>**Text:** 在视频和音频节目中,呈现得肯定都是有限的。 说起来,李硕在本书中所描述的,都是我这个在新石器时代至夏 商周考古领域熬至“资深”的学者所耳熟能详的,但他的视角和写法 却又使我耳目一新:他赋予了我...<br>"
    },
    "metadata": {}
    },
    @@ -319,7 +351,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7965259185103453<br>**Text:** 的支持,其实是心理上的,让我意识到除了祭祀坑里的尸骨,这世界 上还有别的东西。 也许,人不应当凝视深渊;虽然深渊就在那里。 \f \f 始于一页,抵达世界 Humanities ■ Histor...<br>"
    "text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8041932654562923<br>**Text:** 的支持,其实是心理上的,让我意识到除了祭祀坑里的尸骨,这世界 上还有别的东西。 也许,人不应当凝视深渊;虽然深渊就在那里。 \f \f 始于一页,抵达世界 Humanities ■ Histor...<br>"
    },
    "metadata": {}
    },
    @@ -349,12 +381,23 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7947722920317227<br>**Text:** 书》则是“太姒梦见商之庭产棘”。此事应载于《逸周书•程寤》篇,但传 \n世本只存篇名,正文缺。参见黄怀信等《逸周书汇校集注》(修订本),上海 \n古籍出版社,2007年,第262、1141页;李学勤...<br>"
    "text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.7992281260807114<br>**Text:** 们再次进入幽暗的历史通道前,一窺我们这群人何以如此,何以至今。它将予我们 \n鼓励,认识自己,直面未来。\n\n刘苏里万圣书园创办人\n\n-部好的历史著作,不仅要数学家的逻辑,迁要文学家的想象。由此观之...<br>"
    },
    "metadata": {}
    }
    ]
    },
    {
    "cell_type": "markdown",
    "source": [
    "从这个问题的回答可以看到,当chunk_size 从默认的4096 缩减为 1024 后,因为缺乏上下文,回答不够完整,只有牧野之战的前半部分,之前的回答如下:\n",
    "\n",
    "在牧野之战开始时,武王率领西土联军面对着数量远超自己的商军。武王的前提是有殷都内部联络人的密约,但局势不断变化,没有商人助战,西土联军将被一边倒地屠杀。武王没有别的选择,他只能相信父亲描述的那位上帝站在自己一边,只要全心信任他,父亲开启的翦商事业就能成功。在战斗开始时,武王一方没有任何章法和战术可言,但商军阵列却突然自行解体,变成了互相砍杀的人群。或许是看到周军义无反顾的冲锋,商军中的密谋者终于鼓起勇气,倒戈杀向纣王中军。接着,西土联军全部投入了混战。后世的周人史诗说,“商庶若化\",即是说,商军队伍就像滚水冲刷的油脂,瞬间溃散,融化。最终,武王率领的西土联军获胜,商王朝终结。"
    ],
    "metadata": {
    "id": "NIF3KL1ttYhw"
    }
    },
    {
    "cell_type": "code",
    "source": [
    @@ -363,18 +406,18 @@
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 544
    "height": 445
    },
    "id": "bS5LAJkuqR4U",
    "outputId": "18515036-427e-4e63-fd10-2eda66aca4ce"
    "outputId": "ca529dfd-7137-43f6-fcf7-6f90c44fca4c"
    },
    "execution_count": 31,
    "execution_count": 66,
    "outputs": [
    {
    "output_type": "stream",
    "name": "stdout",
    "text": [
    "Token used: 14013, total used: 37002\n"
    "Token used: 4788, total used: 9274\n"
    ]
    },
    {
    @@ -383,7 +426,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Final Response:`** 在牧野之战开始时,武王率领西土联军面对着数量远超自己的商军。武王的前提是有殷都内部联络人的密约,但局势不断变化,没有商人助战,西土联军将被一边倒地屠杀。武王没有别的选择,他只能相信父亲描述的那位上帝站在自己一边,只要全心信任他,父亲开启的翦商事业就能成功。在战斗开始时,武王一方没有任何章法和战术可言,但商军阵列却突然自行解体,变成了互相砍杀的人群。或许是看到周军义无反顾的冲锋,商军中的密谋者终于鼓起勇气,倒戈杀向纣王中军。接着,西土联军全部投入了混战。后世的周人史诗说,“商庶若化\",即是说,商军队伍就像滚水冲刷的油脂,瞬间溃散,融化。最终,武王率领的西土联军获胜,商王朝终结。"
    "text/markdown": "**`Final Response:`** 根据新的上下文,牧野之战的具体过程是:盟军经过六天加急行军,于二月二十一日夜间抵达殷都南郊的牧野,两军都已侦知对方主力的位置,开始连夜整队列阵,准备天亮时一举消灭对手。二十二日甲子凌晨,规模较小的周军首先列队完毕,武王全身盔甲戎装,在阵前宣誓,这便是著名的《尚书•牧誓》。武王手持白旄,高声宣誓:“逖矣,西土之人!”,然后一一点名麾下的盟友、将领、军官,直到“百夫长”,命令他们:“拿起你们的戈,连接好你们的盾牌,立起你们的长矛,现在,我要立誓!”"
    },
    "metadata": {}
    },
    @@ -413,7 +456,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7968888490270644<br>**Text:** 王受命第十一年,5他再度起兵东征。有好几种文献记载武王此次伐\n商的行军日程,但年份和月份皆有所不同。总的来说,武王此次起兵\n是在隆冬季节,决战则是在冬末春初。\n\n总攻的前期工作在前一年底就开始了...<br>"
    "text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8107165489961021<br>**Text:** 的宗教思维,知道必须用法术对抗法术,化解纣王自我献祭可能带来 的后果与流言,方法则是表演一次战斗和处斩,展现纣王被俘和被杀 的全过程:周军直入鹿台宫,武王在战车上对着纣王尸体连射三箭, 然后跳下...<br>"
    },
    "metadata": {}
    },
    @@ -443,7 +486,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7918882416293495<br>**Text:** 并不是武王的私人属下。他们很在意这种身份区别。7\n\n天色渐明,雨势渐小,对面的商军阵列逐渐成形。周人史诗的描\n述是,敌军的戈矛像森林一样密集,所谓“殷商之旅,其会如林”。(《诗\n经•大雅•大明》...<br>"
    "text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8084797679435898<br>**Text:** 事据点可以保持有效联络,一旦某个城邑遭到土著部落威胁,周邻据 点可以尽快参战,战报也可以迅速送到殷都,以便后方组织增援力量。 马拉战车比徒步快三倍以上,这意味着传递战报和命令的时间只需原 来的四...<br>"
    },
    "metadata": {}
    },
    @@ -473,7 +516,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7853722213787273<br>**Text:** 作的“金花”。这都是王室才会有的财物,看来王室和奴隶们居住\n的地方相隔并不远。\n\n\f\n第十一章商人的思维与国家\n\n225\n\n需要注意的是,只有殷墟王宫区发现有大量集中存放的石头农具,\n其他任何商...<br>"
    "text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8057765987066442<br>**Text:** 商人据点,一路向北直指殷都。经过六天加急行军,二月二十一日癸 \n丑夜间,盟军抵达殷都南郊的牧野。6这里是商王室蓄养牛羊的草原,\n地形平坦,商军集结地的营火已经遥遥在望。此时,两军都已侦知对\n方主...<br>"
    },
    "metadata": {}
    }
    @@ -482,23 +525,23 @@
    {
    "cell_type": "code",
    "source": [
    "display_response(chat(\"被商朝献祭的人群中,分别都有哪些角色?\"))"
    "display_response(chat(\"对商朝人祭文化做一个总结。\"))"
    ],
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 433
    },
    "id": "7c1Kwsj2waEB",
    "outputId": "a5383960-998d-4876-c3d0-141f5aeb71ff"
    "outputId": "15d54fca-9c21-43e4-e34f-6b0b546f3af3"
    },
    "execution_count": 33,
    "execution_count": 68,
    "outputs": [
    {
    "output_type": "stream",
    "name": "stdout",
    "text": [
    "Token used: 12048, total used: 60520\n"
    "Token used: 3115, total used: 16625\n"
    ]
    },
    {
    @@ -507,7 +550,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Final Response:`** 在商朝献祭的人群中,有征伐周边斩获的首级的侯来、陈本等人,以及现场屠宰的牲畜,牛“五百有四”头,猪、羊等牲畜共“三千七百有一”头。其中还向天(上帝)和后稷献祭,以及向其他百神、水土之神献祭。其中有王族和方伯,以及纣王自己"
    "text/markdown": "**`Final Response:`** 商朝人祭文化是一种漫长而顽固的风习,从新石器时代晚期以来算起,已经延续两三千年,商朝更是将其吸收到了王朝制度之中。人祭是商朝的国家宗教,也是商族人的全民宗教,王室成为人祭活动最大的主办者,代表着王权和神权的高度融合。人祭行为不仅出现在宫廷与民间,也被商人带到了各殖民城邑。周公与召公的谈话中,周公认为想要根除上千年的积习,谈何容易"
    },
    "metadata": {}
    },
    @@ -537,7 +580,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.850064718034232<br>**Text:** 仪式上,首先奉献的是侯来、陈本等征伐周边斩获的首级,并 搭配现场屠宰的牲畜,“断牛六,断羊二”;然后向天(上帝)和后 稷献祭,用的是牛“五百有四”头;再向其他百神、水土之神献祭, 用猪、羊等牲畜...<br>"
    "text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8775562794009867<br>**Text:** 祭材料。 另外,商人的人祭宗教也和他们的复杂来源有关。灭夏初期,来 自多个文化的人群融合成新兴的“王朝商族”,因此,他们需要构建 一种维系自我认同的宗教文化,而用人献祭是最为明晰和便捷的方式:...<br>"
    },
    "metadata": {}
    },
    @@ -567,7 +610,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8454964094383671<br>**Text:** 之内,中土世界天翻地覆。\n\n纣王焚身而死,后世人大都将其理解为一种走投无路的自绝。其 \n实,按照商人的宗教理念,这是一场最高级的献祭--王把自己奉献 \n给了上帝和祖宗诸神。商朝开国之王成汤(天乙...<br>"
    "text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.8724770531874126<br>**Text:** 和统治列族的权柄。 在商人的人祭宗教兴盛之际,王室成为人祭活动最大的主办者。 \n\n这代表着王权和神权的高度融合。比起二里头一夏朝,这是一个新\n变化:人祭是商朝的国家宗教,也是商族人的全民宗教。...<br>"
    },
    "metadata": {}
    },
    @@ -597,7 +640,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8425104275046603<br>**Text:** 郭宝钧:《1950年春殷墟发掘报告》,第45页。\n20王平、顾彬:《甲骨文与殷商人祭》,大象出版社,2007年,第88、97页。\n21唐际根、汤毓赞:《再论殷墟人祭坑与甲骨文中羌祭卜辞的相关性》...<br>"
    "text/markdown": "**Document ID:** 07dc9745-620b-4274-90eb-0d48f1e9db20<br>**Similarity:** 0.871134373879794<br>**Text:** 其实,此时周公真正关心的问题是商人的人祭文化。商王朝虽然 终结了,但他们用人牲祭祀、奠基和殉葬的传统并没有终止;而且 武王在位期间还曾举行商式献祭,甚至比商人更变本加厉。人祭是 一种漫长而顽固的...<br>"
    },
    "metadata": {}
    }
  2. @ninehills ninehills revised this gist Mar 25, 2023. 1 changed file with 101 additions and 29 deletions.
    130 changes: 101 additions & 29 deletions chatpdf-zh.ipynb
    Original file line number Diff line number Diff line change
    @@ -36,9 +36,9 @@
    "colab": {
    "base_uri": "https://localhost:8080/"
    },
    "outputId": "3c50322c-5eef-43e1-f4a5-34084831e52e"
    "outputId": "ab33bce1-88fd-432d-d65e-1c9892a860a8"
    },
    "execution_count": null,
    "execution_count": 1,
    "outputs": [
    {
    "output_type": "stream",
    @@ -54,12 +54,12 @@
    "source": [
    "WORK_DIR = \"/content/drive/MyDrive/ChatGPT/Notebooks/ChatPDF/\"\n",
    "SRC_FILE = \"jianshang.pdf\"\n",
    "INDEX_FILE = \"jianshang.index\""
    "INDEX_FILE = SRC_FILE + \".index\""
    ],
    "metadata": {
    "id": "UXw1TWw_nj_F"
    },
    "execution_count": 1,
    "execution_count": 3,
    "outputs": []
    },
    {
    @@ -74,7 +74,7 @@
    "metadata": {
    "id": "Aqef8N2RlUpo"
    },
    "execution_count": 2,
    "execution_count": 4,
    "outputs": []
    },
    {
    @@ -84,12 +84,14 @@
    "from llama_index.response.notebook_utils import display_response\n",
    "from llama_index.prompts.prompts import QuestionAnswerPrompt\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from IPython.display import Markdown, display"
    "from IPython.display import Markdown, display\n",
    "from langchain.callbacks.base import CallbackManager\n",
    "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler"
    ],
    "metadata": {
    "id": "Vp6JcErhmt_w"
    },
    "execution_count": 5,
    "execution_count": 20,
    "outputs": []
    },
    {
    @@ -103,12 +105,15 @@
    "load_dotenv()\n",
    "\n",
    "# API configuration\n",
    "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")"
    "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")\n",
    "\n",
    "if OPENAI_API_KEY == \"\":\n",
    " raise Exception(\"Need set OPENAI_API_KEY\")"
    ],
    "metadata": {
    "id": "WKoA2bzul7Gz"
    },
    "execution_count": 3,
    "execution_count": 7,
    "outputs": []
    },
    {
    @@ -130,6 +135,8 @@
    "from pathlib import Path\n",
    "from llama_index import download_loader\n",
    "\n",
    "# 中文 PDF 建议使用 CJKPDFReader,英文建议用 PDFReader\n",
    "# 其他类型文件,请去 https://llamahub.ai/ 寻找合适的 Loader\n",
    "CJKPDFReader = download_loader(\"CJKPDFReader\")\n",
    "\n",
    "loader = CJKPDFReader()\n",
    @@ -145,13 +152,18 @@
    "metadata": {
    "id": "Cb98YMtrnTxU"
    },
    "execution_count": 7,
    "execution_count": 9,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.2, model_name=\"gpt-3.5-turbo\"))\n",
    "llm_predictor_stream = LLMPredictor(llm=ChatOpenAI(\n",
    " temperature=0.2, model_name=\"gpt-3.5-turbo\", stream=True,\n",
    " callback_manager=CallbackManager([StreamingStdOutCallbackHandler()]), verbose=True\n",
    " )\n",
    ")\n",
    "\n",
    "QUESTION_ANSWER_PROMPT_TMPL = (\n",
    " \"Context information is below. \\n\"\n",
    @@ -175,33 +187,79 @@
    "QUESTION_ANSWER_PROMPT = QuestionAnswerPrompt(QUESTION_ANSWER_PROMPT_TMPL_2)\n",
    "\n",
    "def chat(query):\n",
    " return index.query(\n",
    " result = index.query(\n",
    " query,\n",
    " llm_predictor=llm_predictor,\n",
    " text_qa_template=QUESTION_ANSWER_PROMPT,\n",
    " # default: For the given index, “create and refine” an answer by sequentially \n",
    " # going through each Node; make a separate LLM call per Node. Good for more \n",
    " # detailed answers.\n",
    " # compact: For the given index, “compact” the prompt during each LLM call \n",
    " # by stuffing as many Node text chunks that can fit within the maximum prompt size. \n",
    " # If there are too many chunks to stuff in one prompt, “create and refine” an answer \n",
    " # by going through multiple prompts.\n",
    " # tree_summarize: Given a set of Nodes and the query, recursively construct a \n",
    " # tree and return the root node as the response. Good for summarization purposes.\n",
    " response_mode=\"tree_summarize\",\n",
    " similarity_top_k=3,\n",
    " # mode=\"default\" will a create and refine an answer sequentially through \n",
    " # the nodes of the list. \n",
    " # mode=\"embedding\" will synthesize an answer by \n",
    " # fetching the top-k nodes by embedding similarity.\n",
    " mode=\"embedding\",\n",
    " )\n",
    " print(f\"Token used: {llm_predictor.last_token_usage}, total used: {llm_predictor.total_tokens_used}\")\n",
    " return result\n",
    "\n",
    "display_response(chat(\"这本书讲了什么?\"))"
    "# It's not work now, please don't use it.\n",
    "# Bug: https://github.com/jerryjliu/llama_index/issues/831\n",
    "def chat_stream(query):\n",
    " return index.query(\n",
    " query,\n",
    " llm_predictor=llm_predictor_stream,\n",
    " text_qa_template=QUESTION_ANSWER_PROMPT,\n",
    " response_mode=\"tree_summarize\",\n",
    " similarity_top_k=3,\n",
    " streaming=True,\n",
    " mode=\"embedding\",\n",
    " )"
    ],
    "metadata": {
    "id": "6ddjxclno8tg"
    },
    "execution_count": 28,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "resp = chat(\"这本书讲了什么?\")\n",
    "display_response(resp)"
    ],
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 427
    "height": 410
    },
    "id": "6ddjxclno8tg",
    "outputId": "254939dd-b48d-442f-aef1-8e13b5a13c99"
    "id": "psvZXfWirq31",
    "outputId": "16c58a16-c570-4b6a-c6cf-8552ddbf8828"
    },
    "execution_count": 8,
    "execution_count": 29,
    "outputs": [
    {
    "output_type": "stream",
    "name": "stdout",
    "text": [
    "Token used: 10843, total used: 10843\n"
    ]
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Final Response:`** 这本书叫做《周灭商与华夏新生》,主要讲述了商周之变的历史转折,以及商文化与周文化的不同之处。它是一部关于古代中国思想、信仰、伦理、心态、风俗,以及军事、政治、制度、规则的历史著作,讲述了商朝的祭祀与战争为何有如此紧密的联系,以及殷周之变是如何发生的。它还设立了一个出发点:凡对古典中国思想、信仰、伦理、心态、风俗,以及军事、政治、制度、规则有兴趣的研究者或普通读者,可以先从这本书开始你的探索"
    "text/markdown": "**`Final Response:`** 这本书主要讲述了商周之变的历史背景和周朝的兴起,以及商文化和周文化的差异。它还提到了一些关于人祭习俗的历史知识,并分享了作者的研究经历和认知"
    },
    "metadata": {}
    },
    @@ -305,20 +363,27 @@
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 509
    "height": 544
    },
    "id": "bS5LAJkuqR4U",
    "outputId": "b0431e12-55d0-41ce-b84f-43fe88666c6e"
    "outputId": "18515036-427e-4e63-fd10-2eda66aca4ce"
    },
    "execution_count": 9,
    "execution_count": 31,
    "outputs": [
    {
    "output_type": "stream",
    "name": "stdout",
    "text": [
    "Token used: 14013, total used: 37002\n"
    ]
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Final Response:`** 根据文本描述,牧野之战开始时,武王的军队面对着数量远远超过自己的商军阵列,而且他们没有内应,没有商人助战,所以处于两难的困境。武王派出他的岳父兼老师和战略阴谋家吕尚率步兵前往敌阵,自己则带着他的三百辆战车冲向商军阵列,吸引敌军注意力。商军阵列突然自行解体,变成了互相砍杀的人群。接着,西土联军全部投入了混战。最终,商军队伍溃散,武王取得了胜利"
    "text/markdown": "**`Final Response:`** 在牧野之战开始时,武王率领西土联军面对着数量远超自己的商军。武王的前提是有殷都内部联络人的密约,但局势不断变化,没有商人助战,西土联军将被一边倒地屠杀。武王没有别的选择,他只能相信父亲描述的那位上帝站在自己一边,只要全心信任他,父亲开启的翦商事业就能成功。在战斗开始时,武王一方没有任何章法和战术可言,但商军阵列却突然自行解体,变成了互相砍杀的人群。或许是看到周军义无反顾的冲锋,商军中的密谋者终于鼓起勇气,倒戈杀向纣王中军。接着,西土联军全部投入了混战。后世的周人史诗说,“商庶若化\",即是说,商军队伍就像滚水冲刷的油脂,瞬间溃散,融化。最终,武王率领的西土联军获胜,商王朝终结"
    },
    "metadata": {}
    },
    @@ -417,25 +482,32 @@
    {
    "cell_type": "code",
    "source": [
    "display_response(chat(\"人祭有几种情况\"))"
    "display_response(chat(\"被商朝献祭的人群中,分别都有哪些角色\"))"
    ],
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 421
    "height": 433
    },
    "id": "7c1Kwsj2waEB",
    "outputId": "da748c8d-3a13-43cb-881a-efd347088018"
    "outputId": "a5383960-998d-4876-c3d0-141f5aeb71ff"
    },
    "execution_count": null,
    "execution_count": 33,
    "outputs": [
    {
    "output_type": "stream",
    "name": "stdout",
    "text": [
    "Token used: 12048, total used: 60520\n"
    ]
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Final Response:`** 人祭有两种情况,一种是有蓄意虐杀的迹象,献祭者会尽量延缓人牲的死亡,任凭被剁去肢体的人牲尽量地挣扎、哀嚎或咒骂;另一种是例行的祭祀,随意性更大"
    "text/markdown": "**`Final Response:`** 在商朝献祭的人群中,有征伐周边斩获的首级的侯来、陈本等人,以及现场屠宰的牲畜,牛“五百有四”头,猪、羊等牲畜共“三千七百有一”头。其中还向天(上帝)和后稷献祭,以及向其他百神、水土之神献祭。其中有王族和方伯,以及纣王自己"
    },
    "metadata": {}
    },
    @@ -465,7 +537,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8310803169491916<br>**Text:** 祭坑留有蓄意虐杀的迹象,尤其当人牲数量不足,献祭者还会尽量延 \n缓人牲的死亡,任凭被剁去肢体的人牲尽量地挣扎、哀嚎或咒骂。这 \n种心态,跟观看古罗马的角斗士表演有相似之处。\n\n\f\n第二十一章殷都...<br>"
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.850064718034232<br>**Text:** 仪式上,首先奉献的是侯来、陈本等征伐周边斩获的首级,并 搭配现场屠宰的牲畜,“断牛六,断羊二”;然后向天(上帝)和后 稷献祭,用的是牛“五百有四”头;再向其他百神、水土之神献祭, 用猪、羊等牲畜...<br>"
    },
    "metadata": {}
    },
    @@ -495,7 +567,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8285896369748251<br>**Text:** :祭祀坑中的无头尸身,往往连带着下顎甚至上顎骨,说明\n每年例行的祭祀的随意性更大\n\n殷商的王陵祭祀对男性人牲和殉人多用斩首,甚至肢解,而女性\n则多能保存全尸。这背后的宗教思维可能是:男性俘虏和...<br>"
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8454964094383671<br>**Text:** 之内,中土世界天翻地覆\n\n纣王焚身而死,后世人大都将其理解为一种走投无路的自绝。其 \n实,按照商人的宗教理念,这是一场最高级的献祭--王把自己奉献 \n给了上帝和祖宗诸神。商朝开国之王成汤(天乙...<br>"
    },
    "metadata": {}
    },
    @@ -525,7 +597,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8259583785233079<br>**Text:** 仪式上,首先奉献的是侯来、陈本等征伐周边斩获的首级,并 搭配现场屠宰的牲畜,“断牛六,断羊二”;然后向天(上帝)和后 稷献祭,用的是牛“五百有四”头;再向其他百神、水土之神献祭, 用猪、羊等牲畜...<br>"
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8425104275046603<br>**Text:** 郭宝钧:《1950年春殷墟发掘报告》,第45页。\n20王平、顾彬:《甲骨文与殷商人祭》,大象出版社,2007年,第88、97页。\n21唐际根、汤毓赞:《再论殷墟人祭坑与甲骨文中羌祭卜辞的相关性》...<br>"
    },
    "metadata": {}
    }
  3. @ninehills ninehills revised this gist Mar 25, 2023. 1 changed file with 176 additions and 68 deletions.
    244 changes: 176 additions & 68 deletions chatpdf-zh.ipynb
    Original file line number Diff line number Diff line change
    @@ -3,10 +3,7 @@
    "nbformat_minor": 0,
    "metadata": {
    "colab": {
    "provenance": [],
    "mount_file_id": "1IVBrqqDZiGRwldBJ_SsPtkyEpVJ-diDI",
    "authorship_tag": "ABX9TyN/jC5e6yzstKUrokalMTiU",
    "include_colab_link": true
    "provenance": []
    },
    "kernelspec": {
    "name": "python3",
    @@ -17,16 +14,6 @@
    }
    },
    "cells": [
    {
    "cell_type": "markdown",
    "metadata": {
    "id": "view-in-github",
    "colab_type": "text"
    },
    "source": [
    "<a href=\"https://colab.research.google.com/gist/ninehills/ecf7107574c83016e8b68965bf9a51c4/chatpdf-zh.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
    ]
    },
    {
    "cell_type": "markdown",
    "source": [
    @@ -51,7 +38,7 @@
    },
    "outputId": "3c50322c-5eef-43e1-f4a5-34084831e52e"
    },
    "execution_count": 4,
    "execution_count": null,
    "outputs": [
    {
    "output_type": "stream",
    @@ -72,7 +59,7 @@
    "metadata": {
    "id": "UXw1TWw_nj_F"
    },
    "execution_count": 10,
    "execution_count": 1,
    "outputs": []
    },
    {
    @@ -87,66 +74,49 @@
    "metadata": {
    "id": "Aqef8N2RlUpo"
    },
    "execution_count": 5,
    "execution_count": 2,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "# Load environment variables, Just create a .env file with your OPENAI_API_KEY then load it.\n",
    "\n",
    "import os \n",
    "from dotenv import load_dotenv\n",
    "\n",
    "load_dotenv()\n",
    "\n",
    "# API configuration\n",
    "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")"
    "from llama_index import GPTSimpleVectorIndex, LLMPredictor, PromptHelper\n",
    "from llama_index.response.notebook_utils import display_response\n",
    "from llama_index.prompts.prompts import QuestionAnswerPrompt\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from IPython.display import Markdown, display"
    ],
    "metadata": {
    "id": "WKoA2bzul7Gz"
    "id": "Vp6JcErhmt_w"
    },
    "execution_count": 6,
    "execution_count": 5,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "# Load pdf to documents\n",
    "# Load environment variables, Just create a .env file with your OPENAI_API_KEY then load it.\n",
    "\n",
    "from pathlib import Path\n",
    "from llama_index import download_loader\n",
    "import os \n",
    "from dotenv import load_dotenv\n",
    "\n",
    "CJKPDFReader = download_loader(\"CJKPDFReader\")\n",
    "load_dotenv()\n",
    "\n",
    "loader = CJKPDFReader()\n",
    "documents = loader.load_data(file=os.path.join(Path(WORK_DIR), Path(SRC_FILE)))"
    ],
    "metadata": {
    "id": "q9Pp1AZSkarS"
    },
    "execution_count": 7,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "from llama_index import GPTSimpleVectorIndex, LLMPredictor, PromptHelper\n",
    "from llama_index.response.notebook_utils import display_response\n",
    "from llama_index.prompts.prompts import QuestionAnswerPrompt\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from IPython.display import Markdown, display"
    "# API configuration\n",
    "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")"
    ],
    "metadata": {
    "id": "Vp6JcErhmt_w"
    "id": "WKoA2bzul7Gz"
    },
    "execution_count": 15,
    "execution_count": 3,
    "outputs": []
    },
    {
    "cell_type": "markdown",
    "source": [
    "准备 Index 文件,为了避免重复索引,增加缓存"
    "准备 Index 文件,为了避免重复索引,增加缓存\n",
    "\n",
    "\n"
    ],
    "metadata": {
    "id": "SApFHwHCpEGJ"
    @@ -155,9 +125,18 @@
    {
    "cell_type": "code",
    "source": [
    "# Load pdf to documents\n",
    "\n",
    "from pathlib import Path\n",
    "from llama_index import download_loader\n",
    "\n",
    "CJKPDFReader = download_loader(\"CJKPDFReader\")\n",
    "\n",
    "loader = CJKPDFReader()\n",
    "index_file = os.path.join(Path(WORK_DIR), Path(INDEX_FILE))\n",
    "\n",
    "if os.path.exists(index_file) == False:\n",
    " documents = loader.load_data(file=os.path.join(Path(WORK_DIR), Path(SRC_FILE)))\n",
    " index = GPTSimpleVectorIndex(documents)\n",
    " index.save_to_disk(index_file)\n",
    "else:\n",
    @@ -166,7 +145,7 @@
    "metadata": {
    "id": "Cb98YMtrnTxU"
    },
    "execution_count": 14,
    "execution_count": 7,
    "outputs": []
    },
    {
    @@ -181,7 +160,19 @@
    " \"\\n---------------------\\n\"\n",
    " \"{query_str}\\n\"\n",
    ")\n",
    "QUESTION_ANSWER_PROMPT = QuestionAnswerPrompt(QUESTION_ANSWER_PROMPT_TMPL)\n",
    "\n",
    "QUESTION_ANSWER_PROMPT_TMPL_2 = \"\"\"\n",
    "You are an AI assistant providing helpful advice. You are given the following extracted parts of a long document and a question. Provide a conversational answer based on the context provided.\n",
    "If you can't find the answer in the context below, just say \"Hmm, I'm not sure.\" Don't try to make up an answer.\n",
    "If the question is not related to the context, politely respond that you are tuned to only answer questions that are related to the context.\n",
    "Context information is below.\n",
    "=========\n",
    "{context_str}\n",
    "=========\n",
    "{query_str}\n",
    "\"\"\"\n",
    "\n",
    "QUESTION_ANSWER_PROMPT = QuestionAnswerPrompt(QUESTION_ANSWER_PROMPT_TMPL_2)\n",
    "\n",
    "def chat(query):\n",
    " return index.query(\n",
    @@ -197,20 +188,20 @@
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 409
    "height": 427
    },
    "id": "6ddjxclno8tg",
    "outputId": "3b1fd4b2-a5e7-4ed1-ba22-bbefc1d0ea56"
    "outputId": "254939dd-b48d-442f-aef1-8e13b5a13c99"
    },
    "execution_count": 22,
    "execution_count": 8,
    "outputs": [
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Final Response:`** 这本书叫做《翦商:殷周之变与华夏新生》,主要讲述了商周之际的历史变迁,特别是周公废除人祭的历史转折,以及商周文化的差异。作者从历史、考古、文献等多个角度出发,探讨了商周时期的人祭习俗、周公的作用、商周文化的差异等问题。此外,该书还设立了一个出发点,让对古典中国思想、信仰、伦理、心态、风俗,以及军事、政治、制度、规则有兴趣的研究者或普通读者,可以从这本书开始探索"
    "text/markdown": "**`Final Response:`** 这本书叫做《周灭商与华夏新生》,主要讲述了商周之变的历史转折,以及商文化与周文化的不同之处。它是一部关于古代中国思想、信仰、伦理、心态、风俗,以及军事、政治、制度、规则的历史著作,讲述了商朝的祭祀与战争为何有如此紧密的联系,以及殷周之变是如何发生的。它还设立了一个出发点:凡对古典中国思想、信仰、伦理、心态、风俗,以及军事、政治、制度、规则有兴趣的研究者或普通读者,可以先从这本书开始你的探索"
    },
    "metadata": {}
    },
    @@ -240,7 +231,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8000716454899834<br>**Text:** 如《象传》和《彖传》可能是周公作品。其他篇章里常出现“子日”,孔子 \n自己肯定不会这样写,它们应当是孔门弟子编写的。《周易》经传的详细知识, \n可参考廖明春《周易经传十五讲》,北京大学出版社,2...<br>"
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8000785245735972<br>**Text:** 如《象传》和《彖传》可能是周公作品。其他篇章里常出现“子日”,孔子 \n自己肯定不会这样写,它们应当是孔门弟子编写的。《周易》经传的详细知识, \n可参考廖明春《周易经传十五讲》,北京大学出版社,2...<br>"
    },
    "metadata": {}
    },
    @@ -270,7 +261,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7965443968610987<br>**Text:** 的支持,其实是心理上的,让我意识到除了祭祀坑里的尸骨,这世界 上还有别的东西。 也许,人不应当凝视深渊;虽然深渊就在那里。 \f \f 始于一页,抵达世界 Humanities ■ Histor...<br>"
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7965259185103453<br>**Text:** 的支持,其实是心理上的,让我意识到除了祭祀坑里的尸骨,这世界 上还有别的东西。 也许,人不应当凝视深渊;虽然深渊就在那里。 \f \f 始于一页,抵达世界 Humanities ■ Histor...<br>"
    },
    "metadata": {}
    },
    @@ -300,7 +291,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7947832308214735<br>**Text:** 书》则是“太姒梦见商之庭产棘”。此事应载于《逸周书•程寤》篇,但传 \n世本只存篇名,正文缺。参见黄怀信等《逸周书汇校集注》(修订本),上海 \n古籍出版社,2007年,第262、1141页;李学勤...<br>"
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7947722920317227<br>**Text:** 书》则是“太姒梦见商之庭产棘”。此事应载于《逸周书•程寤》篇,但传 \n世本只存篇名,正文缺。参见黄怀信等《逸周书汇校集注》(修订本),上海 \n古籍出版社,2007年,第262、1141页;李学勤...<br>"
    },
    "metadata": {}
    }
    @@ -314,20 +305,137 @@
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 544
    "height": 509
    },
    "id": "bS5LAJkuqR4U",
    "outputId": "cd066f66-743a-476d-ec22-1b1adb10c2e6"
    "outputId": "b0431e12-55d0-41ce-b84f-43fe88666c6e"
    },
    "execution_count": 9,
    "outputs": [
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Final Response:`** 根据文本描述,牧野之战开始时,武王的军队面对着数量远远超过自己的商军阵列,而且他们没有内应,没有商人助战,所以处于两难的困境。武王派出他的岳父兼老师和战略阴谋家吕尚率步兵前往敌阵,自己则带着他的三百辆战车冲向商军阵列,吸引敌军注意力。商军阵列突然自行解体,变成了互相砍杀的人群。接着,西土联军全部投入了混战。最终,商军队伍溃散,武王取得了胜利。"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "---"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Source Node 1/3`**"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7968888490270644<br>**Text:** 王受命第十一年,5他再度起兵东征。有好几种文献记载武王此次伐\n商的行军日程,但年份和月份皆有所不同。总的来说,武王此次起兵\n是在隆冬季节,决战则是在冬末春初。\n\n总攻的前期工作在前一年底就开始了...<br>"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "---"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Source Node 2/3`**"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7918882416293495<br>**Text:** 并不是武王的私人属下。他们很在意这种身份区别。7\n\n天色渐明,雨势渐小,对面的商军阵列逐渐成形。周人史诗的描\n述是,敌军的戈矛像森林一样密集,所谓“殷商之旅,其会如林”。(《诗\n经•大雅•大明》...<br>"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "---"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Source Node 3/3`**"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7853722213787273<br>**Text:** 作的“金花”。这都是王室才会有的财物,看来王室和奴隶们居住\n的地方相隔并不远。\n\n\f\n第十一章商人的思维与国家\n\n225\n\n需要注意的是,只有殷墟王宫区发现有大量集中存放的石头农具,\n其他任何商...<br>"
    },
    "metadata": {}
    }
    ]
    },
    {
    "cell_type": "code",
    "source": [
    "display_response(chat(\"人祭有几种情况?\"))"
    ],
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 421
    },
    "id": "7c1Kwsj2waEB",
    "outputId": "da748c8d-3a13-43cb-881a-efd347088018"
    },
    "execution_count": 23,
    "execution_count": null,
    "outputs": [
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Final Response:`** 牧野之战是周武王率领西土联军与商朝军队在牧野展开的一场决定性战役。商军总数为七十万人,而西土联军只有轻微的损失。在甲子凌晨,规模较小的周军首先列队完毕,武王全身盔甲戎装,在阵前宣誓,这便是著名的《尚书•牧誓》。紧接着,武王一一点名麾下的盟友、将领、军官,直到“百夫长”,命令他们:“拿起你们的戈,连接好你们的盾牌,立起你们的长矛,现在。”商军阵列却突然自行解体,变成了互相砍杀的人群。或许是看到周军义无反顾的冲锋,商军中的密谋者终于鼓起勇气,倒戈杀向纣王中军。接着,西土联军全部投入了混战。最终,商军队伍就像滚水冲刷的油脂,瞬间溃散,融化"
    "text/markdown": "**`Final Response:`** 人祭有两种情况,一种是有蓄意虐杀的迹象,献祭者会尽量延缓人牲的死亡,任凭被剁去肢体的人牲尽量地挣扎、哀嚎或咒骂;另一种是例行的祭祀,随意性更大"
    },
    "metadata": {}
    },
    @@ -357,7 +465,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7969439158483032<br>**Text:** 王受命第十一年,5他再度起兵东征。有好几种文献记载武王此次伐\n商的行军日程,但年份和月份皆有所不同。总的来说,武王此次起兵\n是在隆冬季节,决战则是在冬末春初\n\n总攻的前期工作在前一年底就开始了...<br>"
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8310803169491916<br>**Text:** 祭坑留有蓄意虐杀的迹象,尤其当人牲数量不足,献祭者还会尽量延 \n缓人牲的死亡,任凭被剁去肢体的人牲尽量地挣扎、哀嚎或咒骂。这 \n种心态,跟观看古罗马的角斗士表演有相似之处\n\n\f\n第二十一章殷都...<br>"
    },
    "metadata": {}
    },
    @@ -387,7 +495,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7919791150839193<br>**Text:** 并不是武王的私人属下。他们很在意这种身份区别。7\n\n天色渐明,雨势渐小,对面的商军阵列逐渐成形。周人史诗的描\n述是,敌军的戈矛像森林一样密集,所谓“殷商之旅,其会如林”。(《诗\n经•大雅•大明》...<br>"
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8285896369748251<br>**Text:** :祭祀坑中的无头尸身,往往连带着下顎甚至上顎骨,说明\n每年例行的祭祀的随意性更大。\n\n殷商的王陵祭祀对男性人牲和殉人多用斩首,甚至肢解,而女性\n则多能保存全尸。这背后的宗教思维可能是:男性俘虏和...<br>"
    },
    "metadata": {}
    },
    @@ -417,7 +525,7 @@
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7854334012776085<br>**Text:** 作的“金花”。这都是王室才会有的财物,看来王室和奴隶们居住\n的地方相隔并不远。\n\n\f\n第十一章商人的思维与国家\n\n225\n\n需要注意的是,只有殷墟王宫区发现有大量集中存放的石头农具,\n其他任何商...<br>"
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8259583785233079<br>**Text:** 仪式上,首先奉献的是侯来、陈本等征伐周边斩获的首级,并 搭配现场屠宰的牲畜,“断牛六,断羊二”;然后向天(上帝)和后 稷献祭,用的是牛“五百有四”头;再向其他百神、水土之神献祭, 用猪、羊等牲畜...<br>"
    },
    "metadata": {}
    }
  4. @ninehills ninehills revised this gist Mar 25, 2023. 1 changed file with 12 additions and 1 deletion.
    13 changes: 12 additions & 1 deletion chatpdf-zh.ipynb
    Original file line number Diff line number Diff line change
    @@ -5,7 +5,8 @@
    "colab": {
    "provenance": [],
    "mount_file_id": "1IVBrqqDZiGRwldBJ_SsPtkyEpVJ-diDI",
    "authorship_tag": "ABX9TyN/jC5e6yzstKUrokalMTiU"
    "authorship_tag": "ABX9TyN/jC5e6yzstKUrokalMTiU",
    "include_colab_link": true
    },
    "kernelspec": {
    "name": "python3",
    @@ -16,6 +17,16 @@
    }
    },
    "cells": [
    {
    "cell_type": "markdown",
    "metadata": {
    "id": "view-in-github",
    "colab_type": "text"
    },
    "source": [
    "<a href=\"https://colab.research.google.com/gist/ninehills/ecf7107574c83016e8b68965bf9a51c4/chatpdf-zh.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
    ]
    },
    {
    "cell_type": "markdown",
    "source": [
  5. @ninehills ninehills created this gist Mar 25, 2023.
    416 changes: 416 additions & 0 deletions chatpdf-zh.ipynb
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,416 @@
    {
    "nbformat": 4,
    "nbformat_minor": 0,
    "metadata": {
    "colab": {
    "provenance": [],
    "mount_file_id": "1IVBrqqDZiGRwldBJ_SsPtkyEpVJ-diDI",
    "authorship_tag": "ABX9TyN/jC5e6yzstKUrokalMTiU"
    },
    "kernelspec": {
    "name": "python3",
    "display_name": "Python 3"
    },
    "language_info": {
    "name": "python"
    }
    },
    "cells": [
    {
    "cell_type": "markdown",
    "source": [
    "# Chat with pdf file "
    ],
    "metadata": {
    "id": "4Sw_ysmQlk-8"
    }
    },
    {
    "cell_type": "code",
    "source": [
    "# 建议将 PDF 文件保存在 Google Drive 上,左侧 Connect to Google Drive\n",
    "\n",
    "from google.colab import drive\n",
    "drive.mount('/content/drive')"
    ],
    "metadata": {
    "id": "WKhC2AZRjyok",
    "colab": {
    "base_uri": "https://localhost:8080/"
    },
    "outputId": "3c50322c-5eef-43e1-f4a5-34084831e52e"
    },
    "execution_count": 4,
    "outputs": [
    {
    "output_type": "stream",
    "name": "stdout",
    "text": [
    "Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
    ]
    }
    ]
    },
    {
    "cell_type": "code",
    "source": [
    "WORK_DIR = \"/content/drive/MyDrive/ChatGPT/Notebooks/ChatPDF/\"\n",
    "SRC_FILE = \"jianshang.pdf\"\n",
    "INDEX_FILE = \"jianshang.index\""
    ],
    "metadata": {
    "id": "UXw1TWw_nj_F"
    },
    "execution_count": 10,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "%%capture\n",
    "# update or install the necessary libraries\n",
    "!pip install --upgrade llama_index\n",
    "!pip install --upgrade langchain\n",
    "!pip install --upgrade python-dotenv\n"
    ],
    "metadata": {
    "id": "Aqef8N2RlUpo"
    },
    "execution_count": 5,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "# Load environment variables, Just create a .env file with your OPENAI_API_KEY then load it.\n",
    "\n",
    "import os \n",
    "from dotenv import load_dotenv\n",
    "\n",
    "load_dotenv()\n",
    "\n",
    "# API configuration\n",
    "OPENAI_API_KEY = os.getenv(\"OPENAI_API_KEY\")"
    ],
    "metadata": {
    "id": "WKoA2bzul7Gz"
    },
    "execution_count": 6,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "# Load pdf to documents\n",
    "\n",
    "from pathlib import Path\n",
    "from llama_index import download_loader\n",
    "\n",
    "CJKPDFReader = download_loader(\"CJKPDFReader\")\n",
    "\n",
    "loader = CJKPDFReader()\n",
    "documents = loader.load_data(file=os.path.join(Path(WORK_DIR), Path(SRC_FILE)))"
    ],
    "metadata": {
    "id": "q9Pp1AZSkarS"
    },
    "execution_count": 7,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "from llama_index import GPTSimpleVectorIndex, LLMPredictor, PromptHelper\n",
    "from llama_index.response.notebook_utils import display_response\n",
    "from llama_index.prompts.prompts import QuestionAnswerPrompt\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from IPython.display import Markdown, display"
    ],
    "metadata": {
    "id": "Vp6JcErhmt_w"
    },
    "execution_count": 15,
    "outputs": []
    },
    {
    "cell_type": "markdown",
    "source": [
    "准备 Index 文件,为了避免重复索引,增加缓存"
    ],
    "metadata": {
    "id": "SApFHwHCpEGJ"
    }
    },
    {
    "cell_type": "code",
    "source": [
    "index_file = os.path.join(Path(WORK_DIR), Path(INDEX_FILE))\n",
    "\n",
    "if os.path.exists(index_file) == False:\n",
    " index = GPTSimpleVectorIndex(documents)\n",
    " index.save_to_disk(index_file)\n",
    "else:\n",
    " index = GPTSimpleVectorIndex.load_from_disk(index_file)\n"
    ],
    "metadata": {
    "id": "Cb98YMtrnTxU"
    },
    "execution_count": 14,
    "outputs": []
    },
    {
    "cell_type": "code",
    "source": [
    "llm_predictor = LLMPredictor(llm=ChatOpenAI(temperature=0.2, model_name=\"gpt-3.5-turbo\"))\n",
    "\n",
    "QUESTION_ANSWER_PROMPT_TMPL = (\n",
    " \"Context information is below. \\n\"\n",
    " \"---------------------\\n\"\n",
    " \"{context_str}\"\n",
    " \"\\n---------------------\\n\"\n",
    " \"{query_str}\\n\"\n",
    ")\n",
    "QUESTION_ANSWER_PROMPT = QuestionAnswerPrompt(QUESTION_ANSWER_PROMPT_TMPL)\n",
    "\n",
    "def chat(query):\n",
    " return index.query(\n",
    " query,\n",
    " llm_predictor=llm_predictor,\n",
    " text_qa_template=QUESTION_ANSWER_PROMPT,\n",
    " response_mode=\"tree_summarize\",\n",
    " similarity_top_k=3,\n",
    " )\n",
    "\n",
    "display_response(chat(\"这本书讲了什么?\"))"
    ],
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 409
    },
    "id": "6ddjxclno8tg",
    "outputId": "3b1fd4b2-a5e7-4ed1-ba22-bbefc1d0ea56"
    },
    "execution_count": 22,
    "outputs": [
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Final Response:`** 这本书叫做《翦商:殷周之变与华夏新生》,主要讲述了商周之际的历史变迁,特别是周公废除人祭的历史转折,以及商周文化的差异。作者从历史、考古、文献等多个角度出发,探讨了商周时期的人祭习俗、周公的作用、商周文化的差异等问题。此外,该书还设立了一个出发点,让对古典中国思想、信仰、伦理、心态、风俗,以及军事、政治、制度、规则有兴趣的研究者或普通读者,可以从这本书开始探索。"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "---"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Source Node 1/3`**"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.8000716454899834<br>**Text:** 如《象传》和《彖传》可能是周公作品。其他篇章里常出现“子日”,孔子 \n自己肯定不会这样写,它们应当是孔门弟子编写的。《周易》经传的详细知识, \n可参考廖明春《周易经传十五讲》,北京大学出版社,2...<br>"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "---"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Source Node 2/3`**"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7965443968610987<br>**Text:** 的支持,其实是心理上的,让我意识到除了祭祀坑里的尸骨,这世界 上还有别的东西。 也许,人不应当凝视深渊;虽然深渊就在那里。 \f \f 始于一页,抵达世界 Humanities ■ Histor...<br>"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "---"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Source Node 3/3`**"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7947832308214735<br>**Text:** 书》则是“太姒梦见商之庭产棘”。此事应载于《逸周书•程寤》篇,但传 \n世本只存篇名,正文缺。参见黄怀信等《逸周书汇校集注》(修订本),上海 \n古籍出版社,2007年,第262、1141页;李学勤...<br>"
    },
    "metadata": {}
    }
    ]
    },
    {
    "cell_type": "code",
    "source": [
    "display_response(chat(\"牧野之战的具体过程是什么?\"))"
    ],
    "metadata": {
    "colab": {
    "base_uri": "https://localhost:8080/",
    "height": 544
    },
    "id": "bS5LAJkuqR4U",
    "outputId": "cd066f66-743a-476d-ec22-1b1adb10c2e6"
    },
    "execution_count": 23,
    "outputs": [
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Final Response:`** 牧野之战是周武王率领西土联军与商朝军队在牧野展开的一场决定性战役。商军总数为七十万人,而西土联军只有轻微的损失。在甲子凌晨,规模较小的周军首先列队完毕,武王全身盔甲戎装,在阵前宣誓,这便是著名的《尚书•牧誓》。紧接着,武王一一点名麾下的盟友、将领、军官,直到“百夫长”,命令他们:“拿起你们的戈,连接好你们的盾牌,立起你们的长矛,现在。”商军阵列却突然自行解体,变成了互相砍杀的人群。或许是看到周军义无反顾的冲锋,商军中的密谋者终于鼓起勇气,倒戈杀向纣王中军。接着,西土联军全部投入了混战。最终,商军队伍就像滚水冲刷的油脂,瞬间溃散,融化。"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "---"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Source Node 1/3`**"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7969439158483032<br>**Text:** 王受命第十一年,5他再度起兵东征。有好几种文献记载武王此次伐\n商的行军日程,但年份和月份皆有所不同。总的来说,武王此次起兵\n是在隆冬季节,决战则是在冬末春初。\n\n总攻的前期工作在前一年底就开始了...<br>"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "---"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Source Node 2/3`**"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7919791150839193<br>**Text:** 并不是武王的私人属下。他们很在意这种身份区别。7\n\n天色渐明,雨势渐小,对面的商军阵列逐渐成形。周人史诗的描\n述是,敌军的戈矛像森林一样密集,所谓“殷商之旅,其会如林”。(《诗\n经•大雅•大明》...<br>"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "---"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**`Source Node 3/3`**"
    },
    "metadata": {}
    },
    {
    "output_type": "display_data",
    "data": {
    "text/plain": [
    "<IPython.core.display.Markdown object>"
    ],
    "text/markdown": "**Document ID:** 1f43ae29-e41d-474b-b9c7-2afcb36d0d0b<br>**Similarity:** 0.7854334012776085<br>**Text:** 作的“金花”。这都是王室才会有的财物,看来王室和奴隶们居住\n的地方相隔并不远。\n\n\f\n第十一章商人的思维与国家\n\n225\n\n需要注意的是,只有殷墟王宫区发现有大量集中存放的石头农具,\n其他任何商...<br>"
    },
    "metadata": {}
    }
    ]
    }
    ]
    }