Update app.py
Browse files
app.py
CHANGED
@@ -15,19 +15,6 @@ load_dotenv()
|
|
15 |
hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
|
16 |
repo_id=os.environ.get('repo_id')
|
17 |
|
18 |
-
print(f"定义处理多余的Context文本的函数")
|
19 |
-
def remove_context(text):
|
20 |
-
# 检查 'Context:' 是否存在
|
21 |
-
if 'Context:' in text:
|
22 |
-
# 找到第一个 '\n\n' 的位置
|
23 |
-
end_of_context = text.find('\n\n')
|
24 |
-
# 删除 'Context:' 到第一个 '\n\n' 之间的部分
|
25 |
-
return text[end_of_context + 2:] # '+2' 是为了跳过两个换行符
|
26 |
-
else:
|
27 |
-
# 如果 'Context:' 不存在,返回原始文本
|
28 |
-
return text
|
29 |
-
print(f"处理多余的Context文本函数定义结束")
|
30 |
-
|
31 |
#port = os.getenv('port')
|
32 |
|
33 |
#OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
|
@@ -41,6 +28,7 @@ llm = HuggingFaceHub(repo_id=repo_id, # for StarChat
|
|
41 |
"top_p": 0.95, "eos_token_id": 49155})
|
42 |
|
43 |
#chain = load_summarize_chain(llm, chain_type="stuff") #stuff模式容易导致出错:估计是超LLM的token限制所致
|
|
|
44 |
chain = load_summarize_chain(llm, chain_type="refine")
|
45 |
|
46 |
text_splitter_rcs = RecursiveCharacterTextSplitter(
|
@@ -58,25 +46,28 @@ if url !="" and not url.strip().isspace() and not url == "" and not url.strip()
|
|
58 |
try:
|
59 |
#loader = WebBaseLoader("https://www.usinoip.com/")
|
60 |
with st.spinner("AI Thinking...Please wait a while to Cheers!"):
|
61 |
-
print(
|
62 |
loader = WebBaseLoader(url)
|
|
|
63 |
docs = loader.load()
|
|
|
64 |
split_docs = text_splitter_rcs.split_documents(docs)
|
|
|
|
|
65 |
#result=chain.run(docs) #这个result的格式比较特殊,可以直接print,但不可以和其他字符串联合print输出 - this step errors!
|
66 |
result=chain.run(split_docs)
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
final_ai_response = cleaned_initial_ai_response.split('<|end|>')[0].strip().replace('\n\n', '\n').replace('<|end|>', '').replace('<|user|>', '').replace('<|system|>', '').replace('<|assistant|>', '')
|
72 |
-
new_final_ai_response = final_ai_response.split('Unhelpful Answer:')[0].strip()
|
73 |
-
final_result = new_final_ai_response.split('Note:')[0].strip()
|
74 |
#print("AI Summarization: "+result) #这个会出错,原因见上方
|
75 |
print("AI Summarization:")
|
76 |
#print(result)
|
77 |
-
print(
|
|
|
78 |
st.write("AI Summarization:")
|
79 |
#st.write(result)
|
80 |
-
st.write(
|
|
|
81 |
except Exception as e:
|
82 |
st.write("Wrong URL or URL not parsable.")
|
|
|
15 |
hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
|
16 |
repo_id=os.environ.get('repo_id')
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
#port = os.getenv('port')
|
19 |
|
20 |
#OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')
|
|
|
28 |
"top_p": 0.95, "eos_token_id": 49155})
|
29 |
|
30 |
#chain = load_summarize_chain(llm, chain_type="stuff") #stuff模式容易导致出错:估计是超LLM的token限制所致
|
31 |
+
|
32 |
chain = load_summarize_chain(llm, chain_type="refine")
|
33 |
|
34 |
text_splitter_rcs = RecursiveCharacterTextSplitter(
|
|
|
46 |
try:
|
47 |
#loader = WebBaseLoader("https://www.usinoip.com/")
|
48 |
with st.spinner("AI Thinking...Please wait a while to Cheers!"):
|
49 |
+
print("1")
|
50 |
loader = WebBaseLoader(url)
|
51 |
+
print("2")
|
52 |
docs = loader.load()
|
53 |
+
print(docs)
|
54 |
split_docs = text_splitter_rcs.split_documents(docs)
|
55 |
+
print("3")
|
56 |
+
print(split_docs)
|
57 |
#result=chain.run(docs) #这个result的格式比较特殊,可以直接print,但不可以和其他字符串联合print输出 - this step errors!
|
58 |
result=chain.run(split_docs)
|
59 |
+
print("4")
|
60 |
+
print(url)
|
61 |
+
ai_response=str(result)
|
62 |
+
|
|
|
|
|
|
|
63 |
#print("AI Summarization: "+result) #这个会出错,原因见上方
|
64 |
print("AI Summarization:")
|
65 |
#print(result)
|
66 |
+
print(ai_response)
|
67 |
+
|
68 |
st.write("AI Summarization:")
|
69 |
#st.write(result)
|
70 |
+
st.write(ai_response)
|
71 |
+
|
72 |
except Exception as e:
|
73 |
st.write("Wrong URL or URL not parsable.")
|