binqiangliu commited on
Commit
6831b20
·
1 Parent(s): 09775a5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -32
app.py CHANGED
@@ -82,14 +82,43 @@ def remove_context(text):
82
  print(f"处理多余的Context文本函数定义结束")
83
 
84
  url="https://www.usinoip.com"
85
- texts=""
86
- raw_text=""
87
- user_question = ""
88
- initial_embeddings=""
89
- db_embeddings = ""
90
- i_file_path=""
91
- file_path = ""
92
- random_string=""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  wechat_image= "WeChatCode.jpg"
94
 
95
  st.sidebar.markdown(
@@ -120,17 +149,10 @@ st.markdown(
120
 
121
  user_question = st.text_input("Enter your query here and AI-Chat with your website:")
122
 
123
- #text_splitter = CharacterTextSplitter(
124
- # separator = "\n",
125
- # chunk_size = 1000,
126
- # chunk_overlap = 200,
127
- # length_function = len,
128
- #)
129
-
130
- text_splitter = RecursiveCharacterTextSplitter(
131
- #separator = "\n",
132
  chunk_size = 1000,
133
- chunk_overlap = 200, #striding over the text
134
  length_function = len,
135
  )
136
 
@@ -143,29 +165,31 @@ with st.sidebar:
143
  st.subheader("Enjoy Chatting!")
144
  st.sidebar.markdown('<span class="blue-underline">Life Enhancing with AI.</span>', unsafe_allow_html=True)
145
  try:
146
- with st.spinner("Preparing website materials for you..."):
147
- url_loader = WebBaseLoader([url])
148
- raw_text = url_loader.load()
149
- page_content = raw_text[0].page_content
150
- page_content = str(page_content)
151
- temp_texts = text_splitter.split_text(page_content)
152
- texts = temp_texts
153
- initial_embeddings=get_embeddings(texts)
154
- db_embeddings = torch.FloatTensor(initial_embeddings)
 
155
  except Exception as e:
156
- st.write("Unknow error.")
157
- print("Please enter a valide URL.")
158
- st.stop()
 
159
 
160
  if st.button('Get AI Response'):
161
  if user_question !="" and not user_question.strip().isspace() and not user_question == "" and not user_question.strip() == "" and not user_question.isspace():
162
  with st.spinner("AI Thinking...Please wait a while to Cheers!"):
163
  q_embedding=get_embeddings(user_question)
164
  final_q_embedding = torch.FloatTensor(q_embedding)
165
- hits = semantic_search(final_q_embedding, db_embeddings, top_k=5)
166
  page_contents = []
167
  for i in range(len(hits[0])):
168
- page_content = texts[hits[0][i]['corpus_id']]
169
  page_contents.append(page_content)
170
  temp_page_contents=str(page_contents)
171
  final_page_contents = temp_page_contents.replace('\\n', '')
 
82
  print(f"处理多余的Context文本函数定义结束")
83
 
84
  url="https://www.usinoip.com"
85
+ #url="https://www.usinoip.com/UpdatesAbroad/290.html"
86
+
87
+ if "url_loader" not in st.session_state:
88
+ st.session_state.url_loader = ""
89
+
90
+ if "raw_text" not in st.session_state:
91
+ st.session_state.raw_text = ""
92
+
93
+ if "initial_page_content" not in st.session_state:
94
+ st.session_state.initial_page_content = ""
95
+
96
+ if "final_page_content" not in st.session_state:
97
+ st.session_state.final_page_content = ""
98
+
99
+ if "texts" not in st.session_state:
100
+ st.session_state.texts = ""
101
+
102
+ #if "user_question" not in st.session_state:
103
+ # st.session_state.user_question = ""
104
+
105
+ if "initial_embeddings" not in st.session_state:
106
+ st.session_state.initial_embeddings = ""
107
+
108
+ if "db_embeddings" not in st.session_state:
109
+ st.session_state.db_embeddings = ""
110
+
111
+ #if "i_file_path" not in st.session_state:
112
+ # st.session_state.i_file_path = ""
113
+ i_file_path = ""
114
+
115
+ #if "file_path" not in st.session_state:
116
+ # st.session_state.file_path = ""
117
+
118
+ #if "random_string" not in st.session_state:
119
+ # st.session_state.random_string = ""
120
+ random_string = ""
121
+
122
  wechat_image= "WeChatCode.jpg"
123
 
124
  st.sidebar.markdown(
 
149
 
150
  user_question = st.text_input("Enter your query here and AI-Chat with your website:")
151
 
152
+ text_splitter = CharacterTextSplitter(
153
+ separator = "\n",
 
 
 
 
 
 
 
154
  chunk_size = 1000,
155
+ chunk_overlap = 200,
156
  length_function = len,
157
  )
158
 
 
165
  st.subheader("Enjoy Chatting!")
166
  st.sidebar.markdown('<span class="blue-underline">Life Enhancing with AI.</span>', unsafe_allow_html=True)
167
  try:
168
+ with st.spinner("Preparing website materials for you..."):
169
+ st.session_state.url_loader = WebBaseLoader([url])
170
+ st.session_state.raw_text = st.session_state.url_loader.load()
171
+ st.session_state.initial_page_content = st.session_state.raw_text[0].page_content
172
+ st.session_state.final_page_content = str(st.session_state.initial_page_content)
173
+ st.session_state.temp_texts = text_splitter.split_text(st.session_state.final_page_content)
174
+ #Created a chunk of size 3431, which is longer than the specified 1000
175
+ st.session_state.texts = st.session_state.temp_texts
176
+ st.session_state.initial_embeddings=get_embeddings(st.session_state.texts)
177
+ st.session_state.db_embeddings = torch.FloatTensor(st.session_state.initial_embeddings)
178
  except Exception as e:
179
+ # st.write("Unknow error.")
180
+ # print("Please enter a valide URL.")
181
+ # st.stop()
182
+ pass
183
 
184
  if st.button('Get AI Response'):
185
  if user_question !="" and not user_question.strip().isspace() and not user_question == "" and not user_question.strip() == "" and not user_question.isspace():
186
  with st.spinner("AI Thinking...Please wait a while to Cheers!"):
187
  q_embedding=get_embeddings(user_question)
188
  final_q_embedding = torch.FloatTensor(q_embedding)
189
+ hits = semantic_search(final_q_embedding, st.session_state.db_embeddings, top_k=5)
190
  page_contents = []
191
  for i in range(len(hits[0])):
192
+ page_content = st.session_state.texts[hits[0][i]['corpus_id']]
193
  page_contents.append(page_content)
194
  temp_page_contents=str(page_contents)
195
  final_page_contents = temp_page_contents.replace('\\n', '')