Cdaprod · November 17, 2023 19:49 · Nov 16, 2023
diff --git a/research-assistant-youtube.py b/research-assistant-youtube.py
@@ -0,0 +1,169 @@
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts import ChatPromptTemplate
+from langchain.schema.output_parser import StrOutputParser
+import requests
+from bs4 import BeautifulSoup
+from langchain.schema.runnable import RunnablePassthrough, RunnableLambda
+from langchain.utilities import DuckDuckGoSearchAPIWrapper
+import json
+
+RESULTS_PER_QUESTION = 3
+
+ddg_search = DuckDuckGoSearchAPIWrapper()
+
+
+def web_search(query: str, num_results: int = RESULTS_PER_QUESTION):
+    results = ddg_search.results(query, num_results)
+    return [r["link"] for r in results]
+
+
+SUMMARY_TEMPLATE = """{text} 
+
+-----------
+
+Using the above text, answer in short the following question: 
+
+> {question}
+
+-----------
+if the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats etc if available."""  # noqa: E501
+SUMMARY_PROMPT = ChatPromptTemplate.from_template(SUMMARY_TEMPLATE)
+
+
+def scrape_text(url: str):
+    # Send a GET request to the webpage
+    try:
+        response = requests.get(url)
+
+        # Check if the request was successful
+        if response.status_code == 200:
+            # Parse the content of the request with BeautifulSoup
+            soup = BeautifulSoup(response.text, "html.parser")
+
+            # Extract all text from the webpage
+            page_text = soup.get_text(separator=" ", strip=True)
+
+            # Print the extracted text
+            return page_text
+        else:
+            return f"Failed to retrieve the webpage: Status code {response.status_code}"
+    except Exception as e:
+        print(e)
+        return f"Failed to retrieve the webpage: {e}"
+
+
+url = "https://blog.langchain.dev/announcing-langsmith/"
+
+scrape_and_summarize_chain = RunnablePassthrough.assign(
+    summary = RunnablePassthrough.assign(
+    text=lambda x: scrape_text(x["url"])[:10000]
+) | SUMMARY_PROMPT | ChatOpenAI(model="gpt-3.5-turbo-1106") | StrOutputParser()
+) | (lambda x: f"URL: {x['url']}\n\nSUMMARY: {x['summary']}")
+
+web_search_chain = RunnablePassthrough.assign(
+    urls = lambda x: web_search(x["question"])
+) | (lambda x: [{"question": x["question"], "url": u} for u in x["urls"]]) | scrape_and_summarize_chain.map()
+
+## This is for Arxiv
+
+# from langchain.retrievers import ArxivRetriever
+# 
+# retriever = ArxivRetriever()
+# SUMMARY_TEMPLATE = """{doc} 
+# 
+# -----------
+# 
+# Using the above text, answer in short the following question: 
+# 
+# > {question}
+# 
+# -----------
+# if the question cannot be answered using the text, imply summarize the text. Include all factual information, numbers, stats etc if available."""  # noqa: E501
+# SUMMARY_PROMPT = ChatPromptTemplate.from_template(SUMMARY_TEMPLATE)
+# 
+# 
+# scrape_and_summarize_chain = RunnablePassthrough.assign(
+#     summary =  SUMMARY_PROMPT | ChatOpenAI(model="gpt-3.5-turbo-1106") | StrOutputParser()
+# ) | (lambda x: f"Title: {x['doc'].metadata['Title']}\n\nSUMMARY: {x['summary']}")
+# 
+# web_search_chain = RunnablePassthrough.assign(
+#     docs = lambda x: retriever.get_summaries_as_docs(x["question"])
+# )| (lambda x: [{"question": x["question"], "doc": u} for u in x["docs"]]) | scrape_and_summarize_chain.map()
+
+
+
+SEARCH_PROMPT = ChatPromptTemplate.from_messages(
+    [
+        (
+            "user",
+            "Write 3 google search queries to search online that form an "
+            "objective opinion from the following: {question}\n"
+            "You must respond with a list of strings in the following format: "
+            '["query 1", "query 2", "query 3"].',
+        ),
+    ]
+)
+
+search_question_chain = SEARCH_PROMPT | ChatOpenAI(temperature=0) | StrOutputParser() | json.loads
+
+full_research_chain = search_question_chain | (lambda x: [{"question": q} for q in x]) | web_search_chain.map()
+
+WRITER_SYSTEM_PROMPT = "You are an AI critical thinker research assistant. Your sole purpose is to write well written, critically acclaimed, objective and structured reports on given text."  # noqa: E501
+
+
+# Report prompts from https://github.com/assafelovic/gpt-researcher/blob/master/gpt_researcher/master/prompts.py
+RESEARCH_REPORT_TEMPLATE = """Information:
+--------
+{research_summary}
+--------
+
+Using the above information, answer the following question or topic: "{question}" in a detailed report -- \
+The report should focus on the answer to the question, should be well structured, informative, \
+in depth, with facts and numbers if available and a minimum of 1,200 words.
+
+You should strive to write the report as long as you can using all relevant and necessary information provided.
+You must write the report with markdown syntax.
+You MUST determine your own concrete and valid opinion based on the given information. Do NOT deter to general and meaningless conclusions.
+Write all used source urls at the end of the report, and make sure to not add duplicated sources, but only one reference for each.
+You must write the report in apa format.
+Please do your best, this is very important to my career."""  # noqa: E501
+
+prompt = ChatPromptTemplate.from_messages(
+    [
+        ("system", WRITER_SYSTEM_PROMPT),
+        ("user", RESEARCH_REPORT_TEMPLATE),
+    ]
+)
+
+def collapse_list_of_lists(list_of_lists):
+    content = []
+    for l in list_of_lists:
+        content.append("\n\n".join(l))
+    return "\n\n".join(content)
+
+chain = RunnablePassthrough.assign(
+    research_summary= full_research_chain | collapse_list_of_lists
+) | prompt | ChatOpenAI(model="gpt-3.5-turbo-1106") | StrOutputParser()
+
+#!/usr/bin/env python
+from fastapi import FastAPI
+from langserve import add_routes
+
+
+app = FastAPI(
+  title="LangChain Server",
+  version="1.0",
+  description="A simple api server using Langchain's Runnable interfaces",
+)
+
+add_routes(
+    app,
+    chain,
+    path="/research-assistant",
+)
+
+
+if __name__ == "__main__":
+    import uvicorn
+
+    uvicorn.run(app, host="localhost", port=8000)
No results found