Spaces:

iShare
/

Langchain-Summarization-Chain

Sleeping

App Files Files Community

iShare commited on Dec 7, 2023

Commit

2467b5a

1 Parent(s): 9860b3d

Update app.py

Browse files

Files changed (1) hide show

app.py +24 -3

app.py CHANGED Viewed

@@ -31,6 +31,19 @@ llm = HuggingFaceHub(repo_id=repo_id,  # for StarChat
 chain = load_summarize_chain(llm, chain_type="refine")
 #text_splitter_rcs = RecursiveCharacterTextSplitter(
 #    #separator = "\n", #TypeError: TextSplitter.__init__() got an unexpected keyword argument 'separator'
 #    chunk_size = 500,
@@ -46,18 +59,26 @@ if url !="" and not url.strip().isspace() and not url == "" and not url.strip()
     try:
         #loader = WebBaseLoader("https://www.usinoip.com/")
         with st.spinner("AI Thinking...Please wait a while to Cheers!"):
-            print("Website to Chat: "+url)
             loader = WebBaseLoader(url)
             docs = loader.load()
             print("Webpage contents loaded")
             #split_docs = text_splitter_rcs.split_documents(docs)
             #print(split_docs)
             result=chain.run(docs)   #这个result的格式比较特殊，可以直接print，但不可以和其他字符串联合print输出 - this step errors!
             #result=chain.run(split_docs)   #找到之前总是POST Error的原因：chain.run(docs)的结果，格式不是str，导致程序错误
-            print("Chain run finished")
             result=str(result)
             cleaned_initial_ai_response = remove_context(result)
-            print("Ai Resposne result cleaned initially: "+cleaned_initial_ai_response)
             final_ai_response = cleaned_initial_ai_response.split('<|end|>')[0].strip().replace('\n\n', '\n').replace('<|end|>', '').replace('<|user|>', '').replace('<|system|>', '').replace('<|assistant|>', '')
             new_final_ai_response = final_ai_response.split('Unhelpful Answer:')[0].strip()
             final_result = new_final_ai_response.split('Note:')[0].strip()

 chain = load_summarize_chain(llm, chain_type="refine")
+print(f"定义处理多余的Context文本的函数")
+def remove_context(text):
+    # 检查 'Context:' 是否存在
+    if 'Context:' in text:
+        # 找到第一个 '\n\n' 的位置
+        end_of_context = text.find('\n\n')
+        # 删除 'Context:' 到第一个 '\n\n' 之间的部分
+        return text[end_of_context + 2:]  # '+2' 是为了跳过两个换行符
+    else:
+        # 如果 'Context:' 不存在，返回原始文本
+        return text
+print(f"处理多余的Context文本函数定义结束")
 #text_splitter_rcs = RecursiveCharacterTextSplitter(
 #    #separator = "\n", #TypeError: TextSplitter.__init__() got an unexpected keyword argument 'separator'
 #    chunk_size = 500,
     try:
         #loader = WebBaseLoader("https://www.usinoip.com/")
         with st.spinner("AI Thinking...Please wait a while to Cheers!"):
+            print("Website to Chat: "+url)
             loader = WebBaseLoader(url)
             docs = loader.load()
             print("Webpage contents loaded")
             #split_docs = text_splitter_rcs.split_documents(docs)
             #print(split_docs)
             result=chain.run(docs)   #这个result的格式比较特殊，可以直接print，但不可以和其他字符串联合print输出 - this step errors!
             #result=chain.run(split_docs)   #找到之前总是POST Error的原因：chain.run(docs)的结果，格式不是str，导致程序错误
+            print("Chain run results:")
+            print(result)
             result=str(result)
+            print("Chain run results in str format:")
+            print(result)
             cleaned_initial_ai_response = remove_context(result)
+            print("Ai Resposne result cleaned initially: "+cleaned_initial_ai_response)
             final_ai_response = cleaned_initial_ai_response.split('<|end|>')[0].strip().replace('\n\n', '\n').replace('<|end|>', '').replace('<|user|>', '').replace('<|system|>', '').replace('<|assistant|>', '')
             new_final_ai_response = final_ai_response.split('Unhelpful Answer:')[0].strip()
             final_result = new_final_ai_response.split('Note:')[0].strip()