wholewhale commited on
Commit
a2de0a4
·
1 Parent(s): 731dcdf

simple stuffing

Browse files
Files changed (1) hide show
  1. app.py +3 -18
app.py CHANGED
@@ -36,10 +36,6 @@ def pdf_changes(pdf_doc):
36
  loader = OnlinePDFLoader(pdf_doc.name)
37
  documents = loader.load()
38
 
39
- # Split loaded documents into chunks
40
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
41
- texts = text_splitter.split_documents(documents)
42
-
43
  # Define the prompt for summarization
44
  prompt_template = """Write a concise summary of the following:
45
  "{text}"
@@ -55,23 +51,13 @@ def pdf_changes(pdf_doc):
55
  llm_chain=llm_chain, document_variable_name="text"
56
  )
57
 
58
- # Initialize summary variable
59
- full_summary = ""
60
-
61
- # Iterate through text chunks to summarize
62
- for i in range(0, len(texts), 2):
63
- chunk = " ".join([doc.page_content for doc in texts[i:i + 2]])
64
-
65
- # Generate summary using StuffDocumentsChain
66
- chunk_summary = stuff_chain.run([chunk])
67
-
68
- # Add chunk summary to full summary
69
- full_summary += f"Summary of pages {i+1}-{i+3}:\n{chunk_summary}\n"
70
 
71
  # Other existing logic for Chroma, embeddings, and retrieval
72
  embeddings = OpenAIEmbeddings()
73
  global db
74
- db = Chroma.from_documents(texts, embeddings)
75
 
76
  retriever = db.as_retriever()
77
  global qa
@@ -88,7 +74,6 @@ def pdf_changes(pdf_doc):
88
 
89
 
90
 
91
-
92
  def clear_data():
93
  global qa, db
94
  qa = None
 
36
  loader = OnlinePDFLoader(pdf_doc.name)
37
  documents = loader.load()
38
 
 
 
 
 
39
  # Define the prompt for summarization
40
  prompt_template = """Write a concise summary of the following:
41
  "{text}"
 
51
  llm_chain=llm_chain, document_variable_name="text"
52
  )
53
 
54
+ # Generate summary using StuffDocumentsChain
55
+ full_summary = stuff_chain.run(documents)
 
 
 
 
 
 
 
 
 
 
56
 
57
  # Other existing logic for Chroma, embeddings, and retrieval
58
  embeddings = OpenAIEmbeddings()
59
  global db
60
+ db = Chroma.from_documents(documents, embeddings)
61
 
62
  retriever = db.as_retriever()
63
  global qa
 
74
 
75
 
76
 
 
77
  def clear_data():
78
  global qa, db
79
  qa = None