Spaces:
Sleeping
Sleeping
Commit
·
6831b20
1
Parent(s):
09775a5
Update app.py
Browse files
app.py
CHANGED
@@ -82,14 +82,43 @@ def remove_context(text):
|
|
82 |
print(f"处理多余的Context文本函数定义结束")
|
83 |
|
84 |
url="https://www.usinoip.com"
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
wechat_image= "WeChatCode.jpg"
|
94 |
|
95 |
st.sidebar.markdown(
|
@@ -120,17 +149,10 @@ st.markdown(
|
|
120 |
|
121 |
user_question = st.text_input("Enter your query here and AI-Chat with your website:")
|
122 |
|
123 |
-
|
124 |
-
|
125 |
-
# chunk_size = 1000,
|
126 |
-
# chunk_overlap = 200,
|
127 |
-
# length_function = len,
|
128 |
-
#)
|
129 |
-
|
130 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
131 |
-
#separator = "\n",
|
132 |
chunk_size = 1000,
|
133 |
-
chunk_overlap = 200,
|
134 |
length_function = len,
|
135 |
)
|
136 |
|
@@ -143,29 +165,31 @@ with st.sidebar:
|
|
143 |
st.subheader("Enjoy Chatting!")
|
144 |
st.sidebar.markdown('<span class="blue-underline">Life Enhancing with AI.</span>', unsafe_allow_html=True)
|
145 |
try:
|
146 |
-
with st.spinner("Preparing website materials for you..."):
|
147 |
-
url_loader = WebBaseLoader([url])
|
148 |
-
raw_text = url_loader.load()
|
149 |
-
|
150 |
-
|
151 |
-
temp_texts = text_splitter.split_text(
|
152 |
-
|
153 |
-
|
154 |
-
|
|
|
155 |
except Exception as e:
|
156 |
-
|
157 |
-
|
158 |
-
|
|
|
159 |
|
160 |
if st.button('Get AI Response'):
|
161 |
if user_question !="" and not user_question.strip().isspace() and not user_question == "" and not user_question.strip() == "" and not user_question.isspace():
|
162 |
with st.spinner("AI Thinking...Please wait a while to Cheers!"):
|
163 |
q_embedding=get_embeddings(user_question)
|
164 |
final_q_embedding = torch.FloatTensor(q_embedding)
|
165 |
-
hits = semantic_search(final_q_embedding, db_embeddings, top_k=5)
|
166 |
page_contents = []
|
167 |
for i in range(len(hits[0])):
|
168 |
-
page_content = texts[hits[0][i]['corpus_id']]
|
169 |
page_contents.append(page_content)
|
170 |
temp_page_contents=str(page_contents)
|
171 |
final_page_contents = temp_page_contents.replace('\\n', '')
|
|
|
82 |
print(f"处理多余的Context文本函数定义结束")
|
83 |
|
84 |
url="https://www.usinoip.com"
|
85 |
+
#url="https://www.usinoip.com/UpdatesAbroad/290.html"
|
86 |
+
|
87 |
+
if "url_loader" not in st.session_state:
|
88 |
+
st.session_state.url_loader = ""
|
89 |
+
|
90 |
+
if "raw_text" not in st.session_state:
|
91 |
+
st.session_state.raw_text = ""
|
92 |
+
|
93 |
+
if "initial_page_content" not in st.session_state:
|
94 |
+
st.session_state.initial_page_content = ""
|
95 |
+
|
96 |
+
if "final_page_content" not in st.session_state:
|
97 |
+
st.session_state.final_page_content = ""
|
98 |
+
|
99 |
+
if "texts" not in st.session_state:
|
100 |
+
st.session_state.texts = ""
|
101 |
+
|
102 |
+
#if "user_question" not in st.session_state:
|
103 |
+
# st.session_state.user_question = ""
|
104 |
+
|
105 |
+
if "initial_embeddings" not in st.session_state:
|
106 |
+
st.session_state.initial_embeddings = ""
|
107 |
+
|
108 |
+
if "db_embeddings" not in st.session_state:
|
109 |
+
st.session_state.db_embeddings = ""
|
110 |
+
|
111 |
+
#if "i_file_path" not in st.session_state:
|
112 |
+
# st.session_state.i_file_path = ""
|
113 |
+
i_file_path = ""
|
114 |
+
|
115 |
+
#if "file_path" not in st.session_state:
|
116 |
+
# st.session_state.file_path = ""
|
117 |
+
|
118 |
+
#if "random_string" not in st.session_state:
|
119 |
+
# st.session_state.random_string = ""
|
120 |
+
random_string = ""
|
121 |
+
|
122 |
wechat_image= "WeChatCode.jpg"
|
123 |
|
124 |
st.sidebar.markdown(
|
|
|
149 |
|
150 |
user_question = st.text_input("Enter your query here and AI-Chat with your website:")
|
151 |
|
152 |
+
text_splitter = CharacterTextSplitter(
|
153 |
+
separator = "\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
154 |
chunk_size = 1000,
|
155 |
+
chunk_overlap = 200,
|
156 |
length_function = len,
|
157 |
)
|
158 |
|
|
|
165 |
st.subheader("Enjoy Chatting!")
|
166 |
st.sidebar.markdown('<span class="blue-underline">Life Enhancing with AI.</span>', unsafe_allow_html=True)
|
167 |
try:
|
168 |
+
with st.spinner("Preparing website materials for you..."):
|
169 |
+
st.session_state.url_loader = WebBaseLoader([url])
|
170 |
+
st.session_state.raw_text = st.session_state.url_loader.load()
|
171 |
+
st.session_state.initial_page_content = st.session_state.raw_text[0].page_content
|
172 |
+
st.session_state.final_page_content = str(st.session_state.initial_page_content)
|
173 |
+
st.session_state.temp_texts = text_splitter.split_text(st.session_state.final_page_content)
|
174 |
+
#Created a chunk of size 3431, which is longer than the specified 1000
|
175 |
+
st.session_state.texts = st.session_state.temp_texts
|
176 |
+
st.session_state.initial_embeddings=get_embeddings(st.session_state.texts)
|
177 |
+
st.session_state.db_embeddings = torch.FloatTensor(st.session_state.initial_embeddings)
|
178 |
except Exception as e:
|
179 |
+
# st.write("Unknow error.")
|
180 |
+
# print("Please enter a valide URL.")
|
181 |
+
# st.stop()
|
182 |
+
pass
|
183 |
|
184 |
if st.button('Get AI Response'):
|
185 |
if user_question !="" and not user_question.strip().isspace() and not user_question == "" and not user_question.strip() == "" and not user_question.isspace():
|
186 |
with st.spinner("AI Thinking...Please wait a while to Cheers!"):
|
187 |
q_embedding=get_embeddings(user_question)
|
188 |
final_q_embedding = torch.FloatTensor(q_embedding)
|
189 |
+
hits = semantic_search(final_q_embedding, st.session_state.db_embeddings, top_k=5)
|
190 |
page_contents = []
|
191 |
for i in range(len(hits[0])):
|
192 |
+
page_content = st.session_state.texts[hits[0][i]['corpus_id']]
|
193 |
page_contents.append(page_content)
|
194 |
temp_page_contents=str(page_contents)
|
195 |
final_page_contents = temp_page_contents.replace('\\n', '')
|