Spaces:
Runtime error
Runtime error
refactor: Add Comments for explainig codes and adjust linting
Browse files
app.py
CHANGED
@@ -1,38 +1,45 @@
|
|
|
|
|
|
|
|
|
|
1 |
from langchain.chat_models import ChatOpenAI
|
2 |
from langchain.document_loaders import PyPDFLoader
|
3 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
4 |
-
from langchain.embeddings.cohere import CohereEmbeddings
|
5 |
from langchain.text_splitter import CharacterTextSplitter
|
6 |
-
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
|
7 |
from langchain.vectorstores import Chroma
|
8 |
-
from PyPDF2 import PdfWriter
|
9 |
-
import gradio as gr
|
10 |
-
import os
|
11 |
from dotenv import load_dotenv
|
12 |
-
|
|
|
|
|
|
|
13 |
|
14 |
load_dotenv()
|
15 |
-
|
16 |
-
|
17 |
-
# api_key = os.environ['my_secret'] ## μλΆλ¬μμ§
|
18 |
-
# api_key = os.getenv('my_secret') ## 3νΈ .env λμ secretν€λ₯Ό λΆλ¬μ€λ ννλ‘ λμ
|
19 |
-
os.environ["OPENAI_API_KEY"] = os.environ['my_secret']
|
20 |
|
21 |
-
|
|
|
22 |
documents = loader.load()
|
23 |
|
|
|
24 |
text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=0)
|
25 |
texts = text_splitter.split_documents(documents)
|
26 |
|
27 |
-
|
28 |
embeddings = OpenAIEmbeddings()
|
29 |
vector_store = Chroma.from_documents(texts, embeddings)
|
30 |
retriever = vector_store.as_retriever(search_kwargs={"k": 2})
|
31 |
|
32 |
from langchain.chat_models import ChatOpenAI
|
33 |
from langchain.chains import RetrievalQAWithSourcesChain
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
|
|
|
36 |
|
37 |
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
38 |
llm=llm,
|
@@ -40,12 +47,6 @@ chain = RetrievalQAWithSourcesChain.from_chain_type(
|
|
40 |
retriever = retriever,
|
41 |
return_source_documents=True)
|
42 |
|
43 |
-
from langchain.prompts.chat import (
|
44 |
-
ChatPromptTemplate,
|
45 |
-
SystemMessagePromptTemplate,
|
46 |
-
HumanMessagePromptTemplate,
|
47 |
-
)
|
48 |
-
|
49 |
system_template="""Use the following pieces of context to answer the users question shortly.
|
50 |
Given the following summaries of a long document and a question, create a final answer with references ("SOURCES"), use "SOURCES" in capital letters regardless of the number of sources.
|
51 |
If you don't know the answer, just say that "I don't know", don't try to make up an answer.
|
@@ -61,12 +62,11 @@ messages = [
|
|
61 |
|
62 |
prompt = ChatPromptTemplate.from_messages(messages)
|
63 |
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
chain_type_kwargs = {"prompt": prompt}
|
68 |
|
69 |
-
llm = ChatOpenAI(model_name="gpt-
|
70 |
|
71 |
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
72 |
llm=llm,
|
@@ -84,21 +84,21 @@ for doc in result['source_documents']:
|
|
84 |
print('λ΄μ© : ' + doc.page_content[0:100].replace('\n', ' '))
|
85 |
print('νμΌ : ' + doc.metadata['source'])
|
86 |
print('νμ΄μ§ : ' + str(doc.metadata['page']))
|
|
|
87 |
|
88 |
-
|
89 |
def respond(message, chat_history): # μ±ν
λ΄μ μλ΅μ μ²λ¦¬νλ ν¨μλ₯Ό μ μν©λλ€.
|
90 |
|
91 |
result = chain(message)
|
92 |
-
|
93 |
bot_message = result['answer']
|
94 |
|
95 |
for i, doc in enumerate(result['source_documents']):
|
96 |
bot_message += '[' + str(i+1) + '] ' + doc.metadata['source'] + '(' + str(doc.metadata['page']) + ') '
|
97 |
-
|
98 |
chat_history.append((message, bot_message)) # μ±ν
κΈ°λ‘μ μ¬μ©μμ λ©μμ§μ λ΄μ μλ΅μ μΆκ°ν©λλ€.
|
99 |
|
100 |
return "", chat_history # μμ λ μ±ν
κΈ°λ‘μ λ°νν©λλ€.
|
101 |
|
|
|
102 |
with gr.Blocks(theme='gstaff/sketch') as demo: # gr.Blocks()λ₯Ό μ¬μ©νμ¬ μΈν°νμ΄μ€λ₯Ό μμ±ν©λλ€.
|
103 |
gr.Markdown("# μλ
νμΈμ. μΈμ΄λ
Έμ λνν΄λ³΄μΈμ.")
|
104 |
chatbot = gr.Chatbot(label="μ±ν
μ°½") # 'μ±ν
μ°½'μ΄λΌλ λ μ΄λΈμ κ°μ§ μ±ν
λ΄ μ»΄ν¬λνΈλ₯Ό μμ±ν©λλ€.
|
|
|
1 |
+
import gradio as gr
|
2 |
+
import openai
|
3 |
+
import os
|
4 |
+
|
5 |
from langchain.chat_models import ChatOpenAI
|
6 |
from langchain.document_loaders import PyPDFLoader
|
7 |
from langchain.embeddings.openai import OpenAIEmbeddings
|
|
|
8 |
from langchain.text_splitter import CharacterTextSplitter
|
|
|
9 |
from langchain.vectorstores import Chroma
|
|
|
|
|
|
|
10 |
from dotenv import load_dotenv
|
11 |
+
|
12 |
+
from langchain.embeddings.cohere import CohereEmbeddings
|
13 |
+
from langchain.vectorstores.elastic_vector_search import ElasticVectorSearch
|
14 |
+
from PyPDF2 import PdfWriter
|
15 |
|
16 |
load_dotenv()
|
17 |
+
os.environ["OPENAI_API_KEY"] = os.getenv('my_secret')
|
18 |
+
openai.api_key = os.getenv('my_secret')
|
|
|
|
|
|
|
19 |
|
20 |
+
## Load PDF file
|
21 |
+
loader = PyPDFLoader("docs.pdf")
|
22 |
documents = loader.load()
|
23 |
|
24 |
+
## Split Document
|
25 |
text_splitter = CharacterTextSplitter(chunk_size=800, chunk_overlap=0)
|
26 |
texts = text_splitter.split_documents(documents)
|
27 |
|
28 |
+
## token -> Vector Embedding
|
29 |
embeddings = OpenAIEmbeddings()
|
30 |
vector_store = Chroma.from_documents(texts, embeddings)
|
31 |
retriever = vector_store.as_retriever(search_kwargs={"k": 2})
|
32 |
|
33 |
from langchain.chat_models import ChatOpenAI
|
34 |
from langchain.chains import RetrievalQAWithSourcesChain
|
35 |
+
from langchain.prompts.chat import (
|
36 |
+
ChatPromptTemplate,
|
37 |
+
SystemMessagePromptTemplate,
|
38 |
+
HumanMessagePromptTemplate,
|
39 |
+
)
|
40 |
|
41 |
+
## Build LLM Chain
|
42 |
+
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) # Modify model_name if you have access to GPT-4
|
43 |
|
44 |
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
45 |
llm=llm,
|
|
|
47 |
retriever = retriever,
|
48 |
return_source_documents=True)
|
49 |
|
|
|
|
|
|
|
|
|
|
|
|
|
50 |
system_template="""Use the following pieces of context to answer the users question shortly.
|
51 |
Given the following summaries of a long document and a question, create a final answer with references ("SOURCES"), use "SOURCES" in capital letters regardless of the number of sources.
|
52 |
If you don't know the answer, just say that "I don't know", don't try to make up an answer.
|
|
|
62 |
|
63 |
prompt = ChatPromptTemplate.from_messages(messages)
|
64 |
|
65 |
+
############################
|
66 |
+
## Local μμ μ λλμ§ νμΈ
|
|
|
67 |
chain_type_kwargs = {"prompt": prompt}
|
68 |
|
69 |
+
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0) # Modify model_name if you have access to GPT-4
|
70 |
|
71 |
chain = RetrievalQAWithSourcesChain.from_chain_type(
|
72 |
llm=llm,
|
|
|
84 |
print('λ΄μ© : ' + doc.page_content[0:100].replace('\n', ' '))
|
85 |
print('νμΌ : ' + doc.metadata['source'])
|
86 |
print('νμ΄μ§ : ' + str(doc.metadata['page']))
|
87 |
+
##############################
|
88 |
|
89 |
+
## Define response method
|
90 |
def respond(message, chat_history): # μ±ν
λ΄μ μλ΅μ μ²λ¦¬νλ ν¨μλ₯Ό μ μν©λλ€.
|
91 |
|
92 |
result = chain(message)
|
|
|
93 |
bot_message = result['answer']
|
94 |
|
95 |
for i, doc in enumerate(result['source_documents']):
|
96 |
bot_message += '[' + str(i+1) + '] ' + doc.metadata['source'] + '(' + str(doc.metadata['page']) + ') '
|
|
|
97 |
chat_history.append((message, bot_message)) # μ±ν
κΈ°λ‘μ μ¬μ©μμ λ©μμ§μ λ΄μ μλ΅μ μΆκ°ν©λλ€.
|
98 |
|
99 |
return "", chat_history # μμ λ μ±ν
κΈ°λ‘μ λ°νν©λλ€.
|
100 |
|
101 |
+
## Build Gradio App
|
102 |
with gr.Blocks(theme='gstaff/sketch') as demo: # gr.Blocks()λ₯Ό μ¬μ©νμ¬ μΈν°νμ΄μ€λ₯Ό μμ±ν©λλ€.
|
103 |
gr.Markdown("# μλ
νμΈμ. μΈμ΄λ
Έμ λνν΄λ³΄μΈμ.")
|
104 |
chatbot = gr.Chatbot(label="μ±ν
μ°½") # 'μ±ν
μ°½'μ΄λΌλ λ μ΄λΈμ κ°μ§ μ±ν
λ΄ μ»΄ν¬λνΈλ₯Ό μμ±ν©λλ€.
|