Spaces:
Runtime error
Runtime error
changed default split values
Browse files
app.py
CHANGED
|
@@ -9,6 +9,7 @@ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
|
|
| 9 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 10 |
from langchain.callbacks import get_openai_callback
|
| 11 |
from langchain_core.prompts import ChatPromptTemplate
|
|
|
|
| 12 |
|
| 13 |
from langchain_community.document_loaders import UnstructuredFileLoader
|
| 14 |
|
|
@@ -26,11 +27,11 @@ for file in files:
|
|
| 26 |
# Config
|
| 27 |
with st.sidebar:
|
| 28 |
st.write(f"Injected documents: \n\n {'\n'.join('\n'+file for file in files)}")
|
| 29 |
-
model = st.selectbox("Model name", ["gpt-3.5-turbo"])
|
| 30 |
temperature = st.number_input("Temperature", value=0.0, min_value=0.0, step=0.2, max_value=1.0, placeholder=0.0)
|
| 31 |
-
if st.toggle("Splitting"):
|
| 32 |
-
chunk_size = st.number_input("Chunk size", value=
|
| 33 |
-
chunk_overlap = st.number_input("Chunk overlap", value=
|
| 34 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
| 35 |
splits = text_splitter.split_documents(docs)
|
| 36 |
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
|
|
@@ -67,13 +68,26 @@ def format_docs(docs):
|
|
| 67 |
return "\n\n".join(doc.page_content for doc in docs)
|
| 68 |
|
| 69 |
|
| 70 |
-
rag_chain = (
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
| prompt
|
| 73 |
| llm
|
| 74 |
| StrOutputParser()
|
| 75 |
)
|
| 76 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 77 |
|
| 78 |
st.title("π Volker-Chat")
|
| 79 |
|
|
@@ -81,6 +95,7 @@ def click_button(prompt):
|
|
| 81 |
st.session_state.clicked = True
|
| 82 |
st.session_state['prompt'] = prompt
|
| 83 |
|
|
|
|
| 84 |
c = st.container()
|
| 85 |
c.write("Beispielfragen")
|
| 86 |
col1, col2, col3 = c.columns(3)
|
|
@@ -104,19 +119,27 @@ if st.session_state.clicked:
|
|
| 104 |
st.chat_message("user").write(prompt)
|
| 105 |
with get_openai_callback() as cb:
|
| 106 |
response = rag_chain.invoke(prompt)
|
| 107 |
-
st.chat_message("assistant").write(response)
|
|
|
|
|
|
|
| 108 |
with st.sidebar:
|
| 109 |
sidebar_c = st.container()
|
| 110 |
sidebar_c.success(cb)
|
| 111 |
-
|
| 112 |
|
| 113 |
if prompt := st.chat_input():
|
| 114 |
st.chat_message("user").write(prompt)
|
| 115 |
with get_openai_callback() as cb:
|
| 116 |
response = rag_chain.invoke(prompt)
|
| 117 |
-
st.chat_message("assistant").write(response)
|
|
|
|
|
|
|
| 118 |
with st.sidebar:
|
| 119 |
sidebar_c = st.container()
|
| 120 |
sidebar_c.success(cb)
|
| 121 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 122 |
|
|
|
|
| 9 |
from langchain_text_splitters import RecursiveCharacterTextSplitter
|
| 10 |
from langchain.callbacks import get_openai_callback
|
| 11 |
from langchain_core.prompts import ChatPromptTemplate
|
| 12 |
+
from langchain_core.runnables import RunnableParallel
|
| 13 |
|
| 14 |
from langchain_community.document_loaders import UnstructuredFileLoader
|
| 15 |
|
|
|
|
| 27 |
# Config
|
| 28 |
with st.sidebar:
|
| 29 |
st.write(f"Injected documents: \n\n {'\n'.join('\n'+file for file in files)}")
|
| 30 |
+
model = st.selectbox("Model name", ["gpt-3.5-turbo"], disabled=True)
|
| 31 |
temperature = st.number_input("Temperature", value=0.0, min_value=0.0, step=0.2, max_value=1.0, placeholder=0.0)
|
| 32 |
+
if st.toggle("Splitting", value=True):
|
| 33 |
+
chunk_size = st.number_input("Chunk size", value=750, step=250, placeholder=750) # Defines the chunks in amount of tokens in which the files are split. Also defines the amount of tokens that are feeded into the context.
|
| 34 |
+
chunk_overlap = st.number_input("Chunk overlap", value=0, step=10, placeholder=0)
|
| 35 |
text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
|
| 36 |
splits = text_splitter.split_documents(docs)
|
| 37 |
vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
|
|
|
|
| 68 |
return "\n\n".join(doc.page_content for doc in docs)
|
| 69 |
|
| 70 |
|
| 71 |
+
# rag_chain = (
|
| 72 |
+
# {"context": retriever | format_docs, "question": RunnablePassthrough()}
|
| 73 |
+
# | prompt
|
| 74 |
+
# | llm
|
| 75 |
+
# | StrOutputParser()
|
| 76 |
+
# )
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
rag_chain_from_docs = (
|
| 81 |
+
RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
|
| 82 |
| prompt
|
| 83 |
| llm
|
| 84 |
| StrOutputParser()
|
| 85 |
)
|
| 86 |
|
| 87 |
+
rag_chain = RunnableParallel(
|
| 88 |
+
{"context": retriever, "question": RunnablePassthrough()}
|
| 89 |
+
).assign(answer=rag_chain_from_docs)
|
| 90 |
+
|
| 91 |
|
| 92 |
st.title("π Volker-Chat")
|
| 93 |
|
|
|
|
| 95 |
st.session_state.clicked = True
|
| 96 |
st.session_state['prompt'] = prompt
|
| 97 |
|
| 98 |
+
|
| 99 |
c = st.container()
|
| 100 |
c.write("Beispielfragen")
|
| 101 |
col1, col2, col3 = c.columns(3)
|
|
|
|
| 119 |
st.chat_message("user").write(prompt)
|
| 120 |
with get_openai_callback() as cb:
|
| 121 |
response = rag_chain.invoke(prompt)
|
| 122 |
+
st.chat_message("assistant").write(response['answer'])
|
| 123 |
+
with st.expander("Kontext ansehen"):
|
| 124 |
+
st.write(response["context"])
|
| 125 |
with st.sidebar:
|
| 126 |
sidebar_c = st.container()
|
| 127 |
sidebar_c.success(cb)
|
| 128 |
+
|
| 129 |
|
| 130 |
if prompt := st.chat_input():
|
| 131 |
st.chat_message("user").write(prompt)
|
| 132 |
with get_openai_callback() as cb:
|
| 133 |
response = rag_chain.invoke(prompt)
|
| 134 |
+
st.chat_message("assistant").write(response['answer'])
|
| 135 |
+
with st.expander("Kontext ansehen"):
|
| 136 |
+
st.write(response["context"])
|
| 137 |
with st.sidebar:
|
| 138 |
sidebar_c = st.container()
|
| 139 |
sidebar_c.success(cb)
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
# cleanup
|
| 143 |
+
st.session_state.clicked = False
|
| 144 |
+
vectorstore.delete_collection()
|
| 145 |
|