wholewhale commited on
Commit
8db718c
·
1 Parent(s): 211d0af

auto summary

Browse files
Files changed (1) hide show
  1. app.py +30 -16
app.py CHANGED
@@ -17,21 +17,35 @@ last_interaction_time = 0
17
  def loading_pdf():
18
  return "Working on the upload. Also, pondering the usefulness of sporks..."
19
 
 
 
 
 
 
 
 
20
  def pdf_changes(pdf_doc):
21
- loader = OnlinePDFLoader(pdf_doc.name)
22
- documents = loader.load()
23
- text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
24
- texts = text_splitter.split_documents(documents)
25
- embeddings = OpenAIEmbeddings()
26
- global db
27
- db = Chroma.from_documents(texts, embeddings)
28
- retriever = db.as_retriever()
29
- global qa
30
- qa = ConversationalRetrievalChain.from_llm(
31
- llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
32
- retriever=retriever,
33
- return_source_documents=False)
34
- return "Ready"
 
 
 
 
 
 
 
35
 
36
  def clear_data():
37
  global qa, db
@@ -68,14 +82,14 @@ def infer(question, history):
68
 
69
  def auto_clear_data():
70
  global qa, da, last_interaction_time
71
- if time.time() - last_interaction_time > 600:
72
  qa = None
73
  db = None
74
 
75
  def periodic_clear():
76
  while True:
77
  auto_clear_data()
78
- time.sleep(60)
79
 
80
  threading.Thread(target=periodic_clear).start()
81
 
 
17
  def loading_pdf():
18
  return "Working on the upload. Also, pondering the usefulness of sporks..."
19
 
20
+ # Inside Chroma mod
21
+ def summary(self):
22
+ num_documents = len(self.documents)
23
+ avg_doc_length = sum(len(doc) for doc in self.documents) / num_documents
24
+ return f"Number of documents: {num_documents}, Average document length: {avg_doc_length}"
25
+
26
+ # PDF summary and query
27
  def pdf_changes(pdf_doc):
28
+ try:
29
+ loader = OnlinePDFLoader(pdf_doc.name)
30
+ documents = loader.load()
31
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
32
+ texts = text_splitter.split_documents(documents)
33
+ embeddings = OpenAIEmbeddings()
34
+ global db
35
+ db = Chroma.from_documents(texts, embeddings)
36
+ summary = db.summary() # Assuming Chroma has a summary method
37
+
38
+ retriever = db.as_retriever()
39
+ global qa
40
+ qa = ConversationalRetrievalChain.from_llm(
41
+ llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
42
+ retriever=retriever,
43
+ return_source_documents=False)
44
+
45
+ return f"Ready. {summary}" # Include the summary in the return message
46
+ except Exception as e:
47
+ return f"Error processing PDF: {str(e)}"
48
+
49
 
50
  def clear_data():
51
  global qa, db
 
82
 
83
  def auto_clear_data():
84
  global qa, da, last_interaction_time
85
+ if time.time() - last_interaction_time > 1000:
86
  qa = None
87
  db = None
88
 
89
  def periodic_clear():
90
  while True:
91
  auto_clear_data()
92
+ time.sleep(1000)
93
 
94
  threading.Thread(target=periodic_clear).start()
95