Update app.py
Browse files
app.py
CHANGED
@@ -29,6 +29,13 @@ llm = HuggingFaceHub(repo_id=repo_id, # for StarChat
|
|
29 |
#chain = load_summarize_chain(llm, chain_type="stuff") #stuff模式容易导致出错:估计是超LLM的token限制所致
|
30 |
chain = load_summarize_chain(llm, chain_type="refine")
|
31 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
#llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
|
33 |
|
34 |
url=st.text_input("Enter webiste URL to summarize (format: https://www.usinoip.com):")
|
@@ -42,8 +49,11 @@ if url !="" and not url.strip().isspace() and not url == "" and not url.strip()
|
|
42 |
print("2")
|
43 |
docs = loader.load()
|
44 |
print(docs)
|
|
|
45 |
print("3")
|
46 |
-
|
|
|
|
|
47 |
print("4")
|
48 |
print(url)
|
49 |
ai_response=str(result)
|
|
|
29 |
#chain = load_summarize_chain(llm, chain_type="stuff") #stuff模式容易导致出错:估计是超LLM的token限制所致
|
30 |
chain = load_summarize_chain(llm, chain_type="refine")
|
31 |
|
32 |
+
text_splitter_rcs = RecursiveCharacterTextSplitter(
|
33 |
+
#separator = "\n", #TypeError: TextSplitter.__init__() got an unexpected keyword argument 'separator'
|
34 |
+
chunk_size = 500,
|
35 |
+
chunk_overlap = 100, #striding over the text
|
36 |
+
length_function = len,
|
37 |
+
)
|
38 |
+
|
39 |
#llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
|
40 |
|
41 |
url=st.text_input("Enter webiste URL to summarize (format: https://www.usinoip.com):")
|
|
|
49 |
print("2")
|
50 |
docs = loader.load()
|
51 |
print(docs)
|
52 |
+
split_docs = text_splitter_rcs.split_documents(docs)
|
53 |
print("3")
|
54 |
+
print("split_docs")
|
55 |
+
#result=chain.run(docs) #这个result的格式比较特殊,可以直接print,但不可以和其他字符串联合print输出 - this step errors!
|
56 |
+
result=chain.run(split_docs)
|
57 |
print("4")
|
58 |
print(url)
|
59 |
ai_response=str(result)
|