Commit
·
a2de0a4
1
Parent(s):
731dcdf
simple stuffing
Browse files
app.py
CHANGED
@@ -36,10 +36,6 @@ def pdf_changes(pdf_doc):
|
|
36 |
loader = OnlinePDFLoader(pdf_doc.name)
|
37 |
documents = loader.load()
|
38 |
|
39 |
-
# Split loaded documents into chunks
|
40 |
-
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
41 |
-
texts = text_splitter.split_documents(documents)
|
42 |
-
|
43 |
# Define the prompt for summarization
|
44 |
prompt_template = """Write a concise summary of the following:
|
45 |
"{text}"
|
@@ -55,23 +51,13 @@ def pdf_changes(pdf_doc):
|
|
55 |
llm_chain=llm_chain, document_variable_name="text"
|
56 |
)
|
57 |
|
58 |
-
#
|
59 |
-
full_summary =
|
60 |
-
|
61 |
-
# Iterate through text chunks to summarize
|
62 |
-
for i in range(0, len(texts), 2):
|
63 |
-
chunk = " ".join([doc.page_content for doc in texts[i:i + 2]])
|
64 |
-
|
65 |
-
# Generate summary using StuffDocumentsChain
|
66 |
-
chunk_summary = stuff_chain.run([chunk])
|
67 |
-
|
68 |
-
# Add chunk summary to full summary
|
69 |
-
full_summary += f"Summary of pages {i+1}-{i+3}:\n{chunk_summary}\n"
|
70 |
|
71 |
# Other existing logic for Chroma, embeddings, and retrieval
|
72 |
embeddings = OpenAIEmbeddings()
|
73 |
global db
|
74 |
-
db = Chroma.from_documents(
|
75 |
|
76 |
retriever = db.as_retriever()
|
77 |
global qa
|
@@ -88,7 +74,6 @@ def pdf_changes(pdf_doc):
|
|
88 |
|
89 |
|
90 |
|
91 |
-
|
92 |
def clear_data():
|
93 |
global qa, db
|
94 |
qa = None
|
|
|
36 |
loader = OnlinePDFLoader(pdf_doc.name)
|
37 |
documents = loader.load()
|
38 |
|
|
|
|
|
|
|
|
|
39 |
# Define the prompt for summarization
|
40 |
prompt_template = """Write a concise summary of the following:
|
41 |
"{text}"
|
|
|
51 |
llm_chain=llm_chain, document_variable_name="text"
|
52 |
)
|
53 |
|
54 |
+
# Generate summary using StuffDocumentsChain
|
55 |
+
full_summary = stuff_chain.run(documents)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
56 |
|
57 |
# Other existing logic for Chroma, embeddings, and retrieval
|
58 |
embeddings = OpenAIEmbeddings()
|
59 |
global db
|
60 |
+
db = Chroma.from_documents(documents, embeddings)
|
61 |
|
62 |
retriever = db.as_retriever()
|
63 |
global qa
|
|
|
74 |
|
75 |
|
76 |
|
|
|
77 |
def clear_data():
|
78 |
global qa, db
|
79 |
qa = None
|