Commit
·
9c04c52
1
Parent(s):
8db718c
summary stuffing
Browse files
app.py
CHANGED
@@ -23,30 +23,49 @@ def summary(self):
|
|
23 |
avg_doc_length = sum(len(doc) for doc in self.documents) / num_documents
|
24 |
return f"Number of documents: {num_documents}, Average document length: {avg_doc_length}"
|
25 |
|
26 |
-
# PDF summary and query
|
27 |
def pdf_changes(pdf_doc):
|
28 |
try:
|
29 |
loader = OnlinePDFLoader(pdf_doc.name)
|
30 |
documents = loader.load()
|
31 |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
32 |
texts = text_splitter.split_documents(documents)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
embeddings = OpenAIEmbeddings()
|
34 |
global db
|
35 |
db = Chroma.from_documents(texts, embeddings)
|
36 |
-
|
37 |
-
|
38 |
retriever = db.as_retriever()
|
39 |
global qa
|
40 |
qa = ConversationalRetrievalChain.from_llm(
|
41 |
-
llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
|
42 |
-
retriever=retriever,
|
43 |
-
return_source_documents=False
|
|
|
|
|
|
|
44 |
|
45 |
-
return f"Ready. {summary}" # Include the summary in the return message
|
46 |
except Exception as e:
|
47 |
return f"Error processing PDF: {str(e)}"
|
48 |
|
49 |
|
|
|
50 |
def clear_data():
|
51 |
global qa, db
|
52 |
qa = None
|
|
|
23 |
avg_doc_length = sum(len(doc) for doc in self.documents) / num_documents
|
24 |
return f"Number of documents: {num_documents}, Average document length: {avg_doc_length}"
|
25 |
|
26 |
+
# PDF summary and query using stuffing
|
27 |
def pdf_changes(pdf_doc):
|
28 |
try:
|
29 |
loader = OnlinePDFLoader(pdf_doc.name)
|
30 |
documents = loader.load()
|
31 |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
|
32 |
texts = text_splitter.split_documents(documents)
|
33 |
+
|
34 |
+
# Initialize summary variable
|
35 |
+
full_summary = ""
|
36 |
+
|
37 |
+
# Divide the text into smaller chunks, for example 3 pages per chunk
|
38 |
+
for i in range(0, len(texts), 3):
|
39 |
+
chunk = " ".join(texts[i:i+3])
|
40 |
+
|
41 |
+
# Load the summarization chain with stuffing method
|
42 |
+
stuff_chain = load_summarize_chain(vertex_llm_text, chain_type="stuff", prompt=prompt)
|
43 |
+
|
44 |
+
# Generate summary for the chunk
|
45 |
+
chunk_summary = stuff_chain.run(chunk)
|
46 |
+
|
47 |
+
# Add the chunk summary to the full summary
|
48 |
+
full_summary += f"Summary of pages {i+1}-{i+3}:\n{chunk_summary}\n"
|
49 |
+
|
50 |
embeddings = OpenAIEmbeddings()
|
51 |
global db
|
52 |
db = Chroma.from_documents(texts, embeddings)
|
53 |
+
|
|
|
54 |
retriever = db.as_retriever()
|
55 |
global qa
|
56 |
qa = ConversationalRetrievalChain.from_llm(
|
57 |
+
llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
|
58 |
+
retriever=retriever,
|
59 |
+
return_source_documents=False
|
60 |
+
)
|
61 |
+
|
62 |
+
return f"Ready. Full Summary:\n{full_summary}"
|
63 |
|
|
|
64 |
except Exception as e:
|
65 |
return f"Error processing PDF: {str(e)}"
|
66 |
|
67 |
|
68 |
+
|
69 |
def clear_data():
|
70 |
global qa, db
|
71 |
qa = None
|