elshehawy commited on
Commit
776880e
·
1 Parent(s): 2ffcc60
Files changed (3) hide show
  1. app.py +54 -0
  2. data/pdf/0.pdf +0 -0
  3. requirements.txt +7 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import gradio as gr
3
+ import openai
4
+ from langchain import hub
5
+ from langchain_community.document_loaders import PyPDFLoader
6
+ from langchain_community.vectorstores import Chroma
7
+ from langchain_core.output_parsers import StrOutputParser
8
+ from langchain_core.runnables import RunnablePassthrough
9
+ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
10
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
11
+
12
+
13
+ data_root = './data/pdf/'
14
+ pdf_paths = [data_root+path for path in os.listdir(data_root)]
15
+
16
+ loaders = [PyPDFLoader(path) for path in pdf_paths]
17
+
18
+ docs = []
19
+ for loader in loaders:
20
+ docs.extend(
21
+ loader.load()[0:] # skip first page
22
+ )
23
+
24
+ chunk_size = 500
25
+ chunk_overlap = 100
26
+
27
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size,
28
+ chunk_overlap=chunk_overlap)
29
+
30
+ splits = text_splitter.split_documents(docs)
31
+
32
+ vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
33
+ retriever = vectorstore.as_retriever()
34
+ prompt = hub.pull("rlm/rag-prompt")
35
+ # model_name = 'gpt-3.5-turbo-0125'
36
+ model_name = 'gpt-4-1106-preview'
37
+ llm = ChatOpenAI(model_name=model_name, temperature=0)
38
+
39
+ def format_docs(docs):
40
+ return '\n\n'.join(doc.page_content for doc in docs)
41
+
42
+ rag_chain = (
43
+ {"context": retriever | format_docs, "question": RunnablePassthrough()}
44
+ | prompt
45
+ | llm
46
+ | StrOutputParser()
47
+ )
48
+
49
+ def predict(query):
50
+ return rag_chain.invoke(query)
51
+
52
+ textbox = gr.Textbox(label="اكتب سؤالك هنا", placeholder="", lines=4)
53
+ iface = gr.Interface(fn=predict, inputs=textbox, outputs="text")
54
+ iface.launch()
data/pdf/0.pdf ADDED
Binary file (468 kB). View file
 
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ langchain
2
+ langchain-community
3
+ langchainhub
4
+ langchain-openai
5
+ chromadb
6
+ bs4
7
+ pypdf