timfe commited on
Commit
f35434e
Β·
1 Parent(s): 7efad42

changed default split values

Browse files
Files changed (1) hide show
  1. app.py +33 -10
app.py CHANGED
@@ -9,6 +9,7 @@ from langchain_openai import ChatOpenAI, OpenAIEmbeddings
9
  from langchain_text_splitters import RecursiveCharacterTextSplitter
10
  from langchain.callbacks import get_openai_callback
11
  from langchain_core.prompts import ChatPromptTemplate
 
12
 
13
  from langchain_community.document_loaders import UnstructuredFileLoader
14
 
@@ -26,11 +27,11 @@ for file in files:
26
  # Config
27
  with st.sidebar:
28
  st.write(f"Injected documents: \n\n {'\n'.join('\n'+file for file in files)}")
29
- model = st.selectbox("Model name", ["gpt-3.5-turbo"])
30
  temperature = st.number_input("Temperature", value=0.0, min_value=0.0, step=0.2, max_value=1.0, placeholder=0.0)
31
- if st.toggle("Splitting"):
32
- chunk_size = st.number_input("Chunk size", value=500, step=250, placeholder=500) # Defines the chunks in amount of tokens in which the files are split. Also defines the amount of tokens that are feeded into the context.
33
- chunk_overlap = st.number_input("Chunk overlap", value=100, step=10, placeholder=100)
34
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
35
  splits = text_splitter.split_documents(docs)
36
  vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
@@ -67,13 +68,26 @@ def format_docs(docs):
67
  return "\n\n".join(doc.page_content for doc in docs)
68
 
69
 
70
- rag_chain = (
71
- {"context": retriever | format_docs, "question": RunnablePassthrough()}
 
 
 
 
 
 
 
 
 
72
  | prompt
73
  | llm
74
  | StrOutputParser()
75
  )
76
 
 
 
 
 
77
 
78
  st.title("πŸ” Volker-Chat")
79
 
@@ -81,6 +95,7 @@ def click_button(prompt):
81
  st.session_state.clicked = True
82
  st.session_state['prompt'] = prompt
83
 
 
84
  c = st.container()
85
  c.write("Beispielfragen")
86
  col1, col2, col3 = c.columns(3)
@@ -104,19 +119,27 @@ if st.session_state.clicked:
104
  st.chat_message("user").write(prompt)
105
  with get_openai_callback() as cb:
106
  response = rag_chain.invoke(prompt)
107
- st.chat_message("assistant").write(response)
 
 
108
  with st.sidebar:
109
  sidebar_c = st.container()
110
  sidebar_c.success(cb)
111
- st.session_state.clicked = False
112
 
113
  if prompt := st.chat_input():
114
  st.chat_message("user").write(prompt)
115
  with get_openai_callback() as cb:
116
  response = rag_chain.invoke(prompt)
117
- st.chat_message("assistant").write(response)
 
 
118
  with st.sidebar:
119
  sidebar_c = st.container()
120
  sidebar_c.success(cb)
121
- st.session_state.clicked = False
 
 
 
 
122
 
 
9
  from langchain_text_splitters import RecursiveCharacterTextSplitter
10
  from langchain.callbacks import get_openai_callback
11
  from langchain_core.prompts import ChatPromptTemplate
12
+ from langchain_core.runnables import RunnableParallel
13
 
14
  from langchain_community.document_loaders import UnstructuredFileLoader
15
 
 
27
  # Config
28
  with st.sidebar:
29
  st.write(f"Injected documents: \n\n {'\n'.join('\n'+file for file in files)}")
30
+ model = st.selectbox("Model name", ["gpt-3.5-turbo"], disabled=True)
31
  temperature = st.number_input("Temperature", value=0.0, min_value=0.0, step=0.2, max_value=1.0, placeholder=0.0)
32
+ if st.toggle("Splitting", value=True):
33
+ chunk_size = st.number_input("Chunk size", value=750, step=250, placeholder=750) # Defines the chunks in amount of tokens in which the files are split. Also defines the amount of tokens that are feeded into the context.
34
+ chunk_overlap = st.number_input("Chunk overlap", value=0, step=10, placeholder=0)
35
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=chunk_overlap)
36
  splits = text_splitter.split_documents(docs)
37
  vectorstore = Chroma.from_documents(documents=splits, embedding=OpenAIEmbeddings())
 
68
  return "\n\n".join(doc.page_content for doc in docs)
69
 
70
 
71
+ # rag_chain = (
72
+ # {"context": retriever | format_docs, "question": RunnablePassthrough()}
73
+ # | prompt
74
+ # | llm
75
+ # | StrOutputParser()
76
+ # )
77
+
78
+
79
+
80
+ rag_chain_from_docs = (
81
+ RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
82
  | prompt
83
  | llm
84
  | StrOutputParser()
85
  )
86
 
87
+ rag_chain = RunnableParallel(
88
+ {"context": retriever, "question": RunnablePassthrough()}
89
+ ).assign(answer=rag_chain_from_docs)
90
+
91
 
92
  st.title("πŸ” Volker-Chat")
93
 
 
95
  st.session_state.clicked = True
96
  st.session_state['prompt'] = prompt
97
 
98
+
99
  c = st.container()
100
  c.write("Beispielfragen")
101
  col1, col2, col3 = c.columns(3)
 
119
  st.chat_message("user").write(prompt)
120
  with get_openai_callback() as cb:
121
  response = rag_chain.invoke(prompt)
122
+ st.chat_message("assistant").write(response['answer'])
123
+ with st.expander("Kontext ansehen"):
124
+ st.write(response["context"])
125
  with st.sidebar:
126
  sidebar_c = st.container()
127
  sidebar_c.success(cb)
128
+
129
 
130
  if prompt := st.chat_input():
131
  st.chat_message("user").write(prompt)
132
  with get_openai_callback() as cb:
133
  response = rag_chain.invoke(prompt)
134
+ st.chat_message("assistant").write(response['answer'])
135
+ with st.expander("Kontext ansehen"):
136
+ st.write(response["context"])
137
  with st.sidebar:
138
  sidebar_c = st.container()
139
  sidebar_c.success(cb)
140
+
141
+
142
+ # cleanup
143
+ st.session_state.clicked = False
144
+ vectorstore.delete_collection()
145