wholewhale commited on
Commit
9c04c52
·
1 Parent(s): 8db718c

summary stuffing

Browse files
Files changed (1) hide show
  1. app.py +26 -7
app.py CHANGED
@@ -23,30 +23,49 @@ def summary(self):
23
  avg_doc_length = sum(len(doc) for doc in self.documents) / num_documents
24
  return f"Number of documents: {num_documents}, Average document length: {avg_doc_length}"
25
 
26
- # PDF summary and query
27
  def pdf_changes(pdf_doc):
28
  try:
29
  loader = OnlinePDFLoader(pdf_doc.name)
30
  documents = loader.load()
31
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
32
  texts = text_splitter.split_documents(documents)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  embeddings = OpenAIEmbeddings()
34
  global db
35
  db = Chroma.from_documents(texts, embeddings)
36
- summary = db.summary() # Assuming Chroma has a summary method
37
-
38
  retriever = db.as_retriever()
39
  global qa
40
  qa = ConversationalRetrievalChain.from_llm(
41
- llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
42
- retriever=retriever,
43
- return_source_documents=False)
 
 
 
44
 
45
- return f"Ready. {summary}" # Include the summary in the return message
46
  except Exception as e:
47
  return f"Error processing PDF: {str(e)}"
48
 
49
 
 
50
  def clear_data():
51
  global qa, db
52
  qa = None
 
23
  avg_doc_length = sum(len(doc) for doc in self.documents) / num_documents
24
  return f"Number of documents: {num_documents}, Average document length: {avg_doc_length}"
25
 
26
+ # PDF summary and query using stuffing
27
  def pdf_changes(pdf_doc):
28
  try:
29
  loader = OnlinePDFLoader(pdf_doc.name)
30
  documents = loader.load()
31
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
32
  texts = text_splitter.split_documents(documents)
33
+
34
+ # Initialize summary variable
35
+ full_summary = ""
36
+
37
+ # Divide the text into smaller chunks, for example 3 pages per chunk
38
+ for i in range(0, len(texts), 3):
39
+ chunk = " ".join(texts[i:i+3])
40
+
41
+ # Load the summarization chain with stuffing method
42
+ stuff_chain = load_summarize_chain(vertex_llm_text, chain_type="stuff", prompt=prompt)
43
+
44
+ # Generate summary for the chunk
45
+ chunk_summary = stuff_chain.run(chunk)
46
+
47
+ # Add the chunk summary to the full summary
48
+ full_summary += f"Summary of pages {i+1}-{i+3}:\n{chunk_summary}\n"
49
+
50
  embeddings = OpenAIEmbeddings()
51
  global db
52
  db = Chroma.from_documents(texts, embeddings)
53
+
 
54
  retriever = db.as_retriever()
55
  global qa
56
  qa = ConversationalRetrievalChain.from_llm(
57
+ llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
58
+ retriever=retriever,
59
+ return_source_documents=False
60
+ )
61
+
62
+ return f"Ready. Full Summary:\n{full_summary}"
63
 
 
64
  except Exception as e:
65
  return f"Error processing PDF: {str(e)}"
66
 
67
 
68
+
69
  def clear_data():
70
  global qa, db
71
  qa = None