wholewhale commited on
Commit
731dcdf
·
1 Parent(s): f340ee6
Files changed (1) hide show
  1. app.py +40 -19
app.py CHANGED
@@ -8,6 +8,12 @@ from langchain.llms import OpenAI
8
  from langchain.embeddings import OpenAIEmbeddings
9
  from langchain.vectorstores import Chroma
10
  from langchain.chains import ConversationalRetrievalChain
 
 
 
 
 
 
11
 
12
  os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")
13
 
@@ -25,49 +31,64 @@ def summary(self):
25
 
26
  # PDF summary and query using stuffing
27
  def pdf_changes(pdf_doc):
28
- try:
 
29
  loader = OnlinePDFLoader(pdf_doc.name)
30
  documents = loader.load()
 
 
31
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
32
  texts = text_splitter.split_documents(documents)
33
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  # Initialize summary variable
35
  full_summary = ""
36
-
37
- # Divide the text into smaller chunks, for example, 2 pages per chunk
38
  for i in range(0, len(texts), 2):
39
- chunk = " ".join([doc.page_content for doc in texts[i:i+2]]) # Replace '.content' with the correct attribute
40
-
41
-
42
-
43
- # Load the summarization chain with stuffing method
44
- stuff_chain = load_summarize_chain(vertex_llm_text, chain_type="stuff", prompt=prompt)
45
-
46
- # Generate summary for the chunk
47
- chunk_summary = stuff_chain.run(chunk)
48
 
49
- # Add the chunk summary to the full summary
50
  full_summary += f"Summary of pages {i+1}-{i+3}:\n{chunk_summary}\n"
51
-
 
52
  embeddings = OpenAIEmbeddings()
53
  global db
54
  db = Chroma.from_documents(texts, embeddings)
55
-
56
  retriever = db.as_retriever()
57
- global qa
58
  qa = ConversationalRetrievalChain.from_llm(
59
  llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
60
  retriever=retriever,
61
  return_source_documents=False
62
  )
63
-
64
  return f"Ready. Full Summary:\n{full_summary}"
65
-
66
  except Exception as e:
67
  return f"Error processing PDF: {str(e)}"
68
 
69
 
70
 
 
71
  def clear_data():
72
  global qa, db
73
  qa = None
 
8
  from langchain.embeddings import OpenAIEmbeddings
9
  from langchain.vectorstores import Chroma
10
  from langchain.chains import ConversationalRetrievalChain
11
+ from langchain.chat_models import ChatOpenAI
12
+ from langchain.document_loaders import WebBaseLoader
13
+ from langchain.chains.summarize import load_summarize_chain
14
+ from langchain.chains.llm import LLMChain
15
+ from langchain.prompts import PromptTemplate
16
+ from langchain.chains.combine_documents.stuff import StuffDocumentsChain
17
 
18
  os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")
19
 
 
31
 
32
  # PDF summary and query using stuffing
33
  def pdf_changes(pdf_doc):
34
+ try:
35
+ # Initialize loader and load documents
36
  loader = OnlinePDFLoader(pdf_doc.name)
37
  documents = loader.load()
38
+
39
+ # Split loaded documents into chunks
40
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
41
  texts = text_splitter.split_documents(documents)
42
+
43
+ # Define the prompt for summarization
44
+ prompt_template = """Write a concise summary of the following:
45
+ "{text}"
46
+ CONCISE SUMMARY:"""
47
+ prompt = PromptTemplate.from_template(prompt_template)
48
+
49
+ # Define the LLM chain with the specified prompt
50
+ llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")
51
+ llm_chain = LLMChain(llm=llm, prompt=prompt)
52
+
53
+ # Initialize StuffDocumentsChain
54
+ stuff_chain = StuffDocumentsChain(
55
+ llm_chain=llm_chain, document_variable_name="text"
56
+ )
57
+
58
  # Initialize summary variable
59
  full_summary = ""
60
+
61
+ # Iterate through text chunks to summarize
62
  for i in range(0, len(texts), 2):
63
+ chunk = " ".join([doc.page_content for doc in texts[i:i + 2]])
64
+
65
+ # Generate summary using StuffDocumentsChain
66
+ chunk_summary = stuff_chain.run([chunk])
 
 
 
 
 
67
 
68
+ # Add chunk summary to full summary
69
  full_summary += f"Summary of pages {i+1}-{i+3}:\n{chunk_summary}\n"
70
+
71
+ # Other existing logic for Chroma, embeddings, and retrieval
72
  embeddings = OpenAIEmbeddings()
73
  global db
74
  db = Chroma.from_documents(texts, embeddings)
75
+
76
  retriever = db.as_retriever()
77
+ global qa
78
  qa = ConversationalRetrievalChain.from_llm(
79
  llm=OpenAI(temperature=0.2, model_name="gpt-3.5-turbo", max_tokens=-1, n=2),
80
  retriever=retriever,
81
  return_source_documents=False
82
  )
83
+
84
  return f"Ready. Full Summary:\n{full_summary}"
85
+
86
  except Exception as e:
87
  return f"Error processing PDF: {str(e)}"
88
 
89
 
90
 
91
+
92
  def clear_data():
93
  global qa, db
94
  qa = None