shaolang commited on
Commit
0beb6cf
·
1 Parent(s): ab74abf

Initial implementation of multidoc qa

Browse files
Files changed (3) hide show
  1. README.md +2 -5
  2. app.py +46 -0
  3. requirements.txt +4 -0
README.md CHANGED
@@ -1,10 +1,7 @@
1
  ---
2
- title: Bllm W2a2 Multidoc Query
3
- emoji: 🦀
4
- colorFrom: gray
5
- colorTo: red
6
  sdk: gradio
7
- sdk_version: 3.32.0
8
  app_file: app.py
9
  pinned: false
10
  license: apache-2.0
 
1
  ---
2
+ title: Lewis Carroll's Alice in Wonderland and Through the Looking Glass
 
 
 
3
  sdk: gradio
4
+ sdk_version: 3.27.0
5
  app_file: app.py
6
  pinned: false
7
  license: apache-2.0
app.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain.embeddings.openai import OpenAIEmbeddings
2
+ from langchain.document_loaders import GutenbergLoader
3
+ from langchain.vectorstores import Chroma
4
+ from langchain.text_splitter import CharacterTextSplitter
5
+ from langchain.chains.question_answering import load_qa_chain
6
+ from langchain.llms import OpenAI
7
+
8
+
9
+ docs = [
10
+ doc
11
+ for path in ['cache/epub/35688/pg35688.txt', 'files/23718/23718-readme.txt']
12
+ for doc in GutenbergLoader(f'https://www.gutenberg.org/{path}').load()
13
+ ]
14
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0, separator = "\n")
15
+ texts = text_splitter.split_documents(docs)
16
+
17
+ chain = None
18
+ embeddings = None
19
+ docsearch = None
20
+
21
+
22
+ def make_inference(openai_api_key, query):
23
+ global chain, embeddings, docsearch
24
+
25
+ if chain is None:
26
+ chain = load_qa_chain(OpenAI(temperature=0, openai_api_key=openai_api_key.strip()), chain_type="stuff")
27
+ embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
28
+ docsearch = Chroma.from_documents(texts, embeddings, metadatas=[{"source": str(i)} for i in range(len(texts))]).as_retriever()
29
+
30
+ docs = docsearch.get_relevant_documents(query)
31
+ return(chain.run(input_documents=docs, question=query))
32
+
33
+ if __name__ == "__main__":
34
+ # make a gradio interface
35
+ import gradio as gr
36
+
37
+ gr.Interface(
38
+ make_inference,
39
+ [
40
+ gr.inputs.Textbox(lines=1, label="OpenAI API Key"),
41
+ gr.inputs.Textbox(lines=2, label="Query"),
42
+ ],
43
+ gr.outputs.Textbox(label="Response"),
44
+ title="Multiple Document QA with LangChain",
45
+ description="Ask me anything about Lewis Carroll's Alice in Wonderland and Through the Looking-Glass",
46
+ ).launch()
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ langchain
2
+ openai
3
+ tiktoken
4
+ chromadb