wholewhale commited on
Commit
2b90e18
·
1 Parent(s): 6360179
Files changed (1) hide show
  1. app.py +9 -32
app.py CHANGED
@@ -35,49 +35,26 @@ summary_state = gr.State(initial_value="pending")
35
  # PDF summary and query using stuffing
36
  def pdf_changes(pdf_doc):
37
  try:
38
- # Initialize loader and load documents
 
 
39
  loader = OnlinePDFLoader(pdf_doc.name)
40
  documents = loader.load()
41
-
42
- # Define the prompt for summarization
43
- prompt_template = """Write a concise summary of the following:
44
- "{text}"
45
- CONCISE SUMMARY:"""
46
- prompt = PromptTemplate.from_template(prompt_template)
47
-
48
- # Define the LLM chain with the specified prompt
49
- llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
50
- llm_chain = LLMChain(llm=llm, prompt=prompt)
51
-
52
- # Initialize StuffDocumentsChain
53
- stuff_chain = StuffDocumentsChain(
54
- llm_chain=llm_chain, document_variable_name="text"
55
- )
56
-
57
- # Generate summary using StuffDocumentsChain
58
- global full_summary
59
- full_summary = stuff_chain.run(documents)
60
- # Update the state variable
61
- return {summary_state: full_summary}
62
-
63
- # Other existing logic for Chroma, embeddings, and retrieval
64
  embeddings = OpenAIEmbeddings()
65
  global db
66
- db = Chroma.from_documents(documents, embeddings)
67
-
68
  retriever = db.as_retriever()
69
  global qa
70
  qa = ConversationalRetrievalChain.from_llm(
71
- llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo-16k", max_tokens=-1, n=2),
72
  retriever=retriever,
73
  return_source_documents=False
74
  )
75
- summary_box.set_value(full_summary)
76
- return f"Ready. Full Summary loaded."
77
-
78
  except Exception as e:
79
- return f"Error processing PDF: {str(e)}"
80
-
81
 
82
 
83
  def clear_data():
 
35
  # PDF summary and query using stuffing
36
  def pdf_changes(pdf_doc):
37
  try:
38
+ if pdf_doc is None:
39
+ return "No PDF uploaded."
40
+
41
  loader = OnlinePDFLoader(pdf_doc.name)
42
  documents = loader.load()
43
+ text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
44
+ texts = text_splitter.split_documents(documents)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  embeddings = OpenAIEmbeddings()
46
  global db
47
+ db = Chroma.from_documents(texts, embeddings)
 
48
  retriever = db.as_retriever()
49
  global qa
50
  qa = ConversationalRetrievalChain.from_llm(
51
+ llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
52
  retriever=retriever,
53
  return_source_documents=False
54
  )
55
+ return "Ready"
 
 
56
  except Exception as e:
57
+ return f"Error loading PDF: {e}"
 
58
 
59
 
60
  def clear_data():