Spaces:

llm-wizard
/

ArxivChainLitDemo

Paused

App Files Files Community

CSAle commited on Jun 8, 2023

Commit

f25b2b3

0 Parent(s):

Adding Initial App

Browse files

Files changed (7) hide show

.chainlit/.langchain.db +0 -0
.chainlit/config.toml +29 -0
.gitignore +4 -0
Dockerfile +7 -0
app.py +128 -0
chainlit.md +11 -0
requirements.txt +6 -0

.chainlit/.langchain.db ADDED Viewed

Binary file (12.3 kB). View file

.chainlit/config.toml ADDED Viewed

	@@ -0,0 +1,29 @@

+[project]
+# Name of the app and chatbot.
+name = "Arxiv Chatbot"
+# Description of the app and chatbot. This is used for HTML tags.
+# description = ""
+# If true (default), the app will be available to anonymous users (once deployed).
+# If false, users will need to authenticate and be part of the project to use the app.
+public = true
+# The project ID (found on https://cloud.chainlit.io).
+# If provided, all the message data will be stored in the cloud.
+# The project ID is required when public is set to false.
+#id = ""
+# Whether to enable telemetry (default: true). No personal data is collected.
+enable_telemetry = false
+# List of environment variables to be provided by each user to use the app.
+user_env = ["OPENAI_API_KEY"]
+# Hide the chain of thought details from the user in the UI.
+hide_cot = false
+# Link to your github repo. This will add a github button in the UI's header.
+# github = ""
+# Limit the number of requests per user.
+#request_limit = "10 per day"

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+.env
+.vscode
+.chroma
+__pycache__

Dockerfile ADDED Viewed

	@@ -0,0 +1,7 @@

+FROM 3.8.17-alpine3.18
+# copy the requirements.txt file first to avoid cache invalidations
+COPY requirements.txt /app/requirements.txt
+WORKDIR /app
+RUN pip install -r requirements.txt
+COPY . /app
+CMD ["chainlit", "app.py"]

app.py ADDED Viewed

	@@ -0,0 +1,128 @@

+from langchain.embeddings.openai import OpenAIEmbeddings
+from langchain.document_loaders import PyMuPDFLoader
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.vectorstores import Chroma
+from langchain.chains import RetrievalQAWithSourcesChain
+from langchain.chat_models import ChatOpenAI
+from langchain.prompts.chat import (
+    ChatPromptTemplate,
+    SystemMessagePromptTemplate,
+    HumanMessagePromptTemplate,
+)
+import os
+import arxiv
+import chainlit as cl
+from chainlit import user_session
+user_env = user_session.get("env")
+system_template = """Use the following pieces of context to answer the users question.
+If you don't know the answer, just say that you don't know, don't try to make up an answer.
+ALWAYS return a "SOURCES" part in your answer.
+The "SOURCES" part should be a reference to the source of the document from which you got your answer.
+Example of your response should be:
+```
+The answer is foo
+SOURCES:
+Title: xyz
+Page Number: 1
+URL: https://arxiv.org/abs/X.Y.Z
+```
+Begin!
+----------------
+{summaries}"""
+messages = [
+    SystemMessagePromptTemplate.from_template(system_template),
+    HumanMessagePromptTemplate.from_template("{question}"),
+]
+prompt = ChatPromptTemplate.from_messages(messages)
+chain_type_kwargs = {"prompt": prompt}
+@cl.langchain_factory
+def init():
+    arxiv_query = None
+    # Wait for the user to ask an Arxiv question
+    while arxiv_query == None:
+        arxiv_query = cl.AskUserMessage(
+            content="Please enter a topic to begin!", timeout=15
+        ).send()
+    # Obtain the top 30 results from Arxiv for the query
+    search = arxiv.Search(
+        query=arxiv_query["content"],
+        max_results=30,
+        sort_by=arxiv.SortCriterion.Relevance,
+    )
+    # download each of the pdfs
+    pdf_data = []
+    for result in search.results():
+        loader = PyMuPDFLoader(result.pdf_url)
+        loaded_pdf = loader.load()
+        for document in loaded_pdf:
+            document.metadata["source"] = result.entry_id
+            document.metadata["file_path"] = result.pdf_url
+            document.metadata["title"] = result.title
+            pdf_data.append(document)
+    # Create a Chroma vector store
+    embeddings = OpenAIEmbeddings(disallowed_special=())
+    docsearch = Chroma.from_documents(pdf_data, embeddings)
+    # Create a chain that uses the Chroma vector store
+    chain = RetrievalQAWithSourcesChain.from_chain_type(
+        ChatOpenAI(
+            model_name="gpt-4",
+            temperature=0,
+            openai_api_key=user_env.get("OPENAI_API_KEY"),
+        ),
+        chain_type="stuff",
+        retriever=docsearch.as_retriever(),
+        return_source_documents=True,
+    )
+    # Let the user know that the system is ready
+    cl.Message(
+        content=f"We found a few papers about `{arxiv_query['content']}` you can now ask questions!"
+    ).send()
+    return chain
+@cl.langchain_postprocess
+def process_response(res):
+    answer = res["answer"]
+    source_elements_dict = {}
+    source_elements = []
+    for idx, source in enumerate(res["source_documents"]):
+        title = source.metadata["title"]
+        if title not in source_elements_dict:
+            source_elements_dict[title] = {
+                "page_number": [source.metadata["page"]],
+                "url": source.metadata["file_path"],
+            }
+        else:
+            source_elements_dict[title]["page_number"].append(source.metadata["page"])
+        # sort the page numbers
+        source_elements_dict[title]["page_number"].sort()
+    for title, source in source_elements_dict.items():
+        # create a string for the page numbers
+        page_numbers = ", ".join([str(x) for x in source["page_number"]])
+        text_for_source = f"Page Number(s): {page_numbers}\nURL: {source['url']}"
+        source_elements.append(
+            cl.Text(name=title, text=text_for_source, display="inline")
+        )
+    cl.Message(content=answer, elements=source_elements).send()

chainlit.md ADDED Viewed

	@@ -0,0 +1,11 @@

+# ⚠️ Warning ⚠️
+You will need a GPT-4 API key to use this app due to large context size!
+# Welcome to AskArxiv powered by Chainlit!
+In this app, you'll be able to enter a topic - and then ask ~30 papers from Arxiv about that topic!
+### Link To Demo
+[Hugging Face Space]()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+arxiv==1.4.7
+langchain==0.0.193
+chainlit
+openai
+chromadb
+tiktoken