Spaces:

iShare
/

Langchain-Summarization-Chain

Sleeping

iShare commited on Dec 7, 2023

Commit

9a8c838

1 Parent(s): d6ec152

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -29,6 +29,13 @@ llm = HuggingFaceHub(repo_id=repo_id,  # for StarChat
 #chain = load_summarize_chain(llm, chain_type="stuff")    #stuff模式容易导致出错：估计是超LLM的token限制所致
 chain = load_summarize_chain(llm, chain_type="refine")
 #llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
 url=st.text_input("Enter webiste URL to summarize (format: https://www.usinoip.com):")
@@ -42,8 +49,11 @@ if url !="" and not url.strip().isspace() and not url == "" and not url.strip()
             print("2")
             docs = loader.load()
             print(docs)
             print("3")
-            result=chain.run(docs)   #这个result的格式比较特殊，可以直接print，但不可以和其他字符串联合print输出 - this step errors!
             print("4")
             print(url)
             ai_response=str(result)

 #chain = load_summarize_chain(llm, chain_type="stuff")    #stuff模式容易导致出错：估计是超LLM的token限制所致
 chain = load_summarize_chain(llm, chain_type="refine")
+text_splitter_rcs = RecursiveCharacterTextSplitter(
+    #separator = "\n", #TypeError: TextSplitter.__init__() got an unexpected keyword argument 'separator'
+    chunk_size = 500,
+    chunk_overlap  = 100, #striding over the text
+    length_function = len,
+    )
 #llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
 url=st.text_input("Enter webiste URL to summarize (format: https://www.usinoip.com):")
             print("2")
             docs = loader.load()
             print(docs)
+            split_docs = text_splitter_rcs.split_documents(docs)
             print("3")
+            print("split_docs")
+            #result=chain.run(docs)   #这个result的格式比较特殊，可以直接print，但不可以和其他字符串联合print输出 - this step errors!
+            result=chain.run(split_docs)
             print("4")
             print(url)
             ai_response=str(result)