wholewhale commited on
Commit
3e93b01
·
1 Parent(s): 1a0b1c4
Files changed (1) hide show
  1. app.py +6 -8
app.py CHANGED
@@ -1,32 +1,30 @@
1
- from summarizer import Summarizer
2
  import gradio as gr
3
  from gradio import state
4
  import os
5
  import time
6
  import threading
7
  from langchain.document_loaders import OnlinePDFLoader
 
8
  from langchain.llms import OpenAI
 
9
  from langchain.vectorstores import Chroma
10
  from langchain.chains import ConversationalRetrievalChain
11
 
12
  os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")
13
- bert_model = Summarizer()
14
 
15
  # Declare session state for tracking last interaction time
16
  last_interaction_time = state.declare("last_interaction_time", 0)
17
 
18
  def loading_pdf():
19
- return "Working the upload..."
20
 
21
  def pdf_changes(pdf_doc):
22
  loader = OnlinePDFLoader(pdf_doc.name)
23
  documents = loader.load()
24
-
25
- # Summarize the text with BERT
26
- summarized_text = bert_model(documents)
27
-
28
  embeddings = OpenAIEmbeddings()
29
- db = Chroma.from_documents([summarized_text], embeddings)
30
  retriever = db.as_retriever()
31
  global qa
32
  qa = ConversationalRetrievalChain.from_llm(
 
 
1
  import gradio as gr
2
  from gradio import state
3
  import os
4
  import time
5
  import threading
6
  from langchain.document_loaders import OnlinePDFLoader
7
+ from langchain.text_splitter import CharacterTextSplitter
8
  from langchain.llms import OpenAI
9
+ from langchain.embeddings import OpenAIEmbeddings
10
  from langchain.vectorstores import Chroma
11
  from langchain.chains import ConversationalRetrievalChain
12
 
13
  os.environ['OPENAI_API_KEY'] = os.getenv("Your_API_Key")
 
14
 
15
  # Declare session state for tracking last interaction time
16
  last_interaction_time = state.declare("last_interaction_time", 0)
17
 
18
  def loading_pdf():
19
+ return "Working the upload. Also, pondering the usefulness of sporks..."
20
 
21
  def pdf_changes(pdf_doc):
22
  loader = OnlinePDFLoader(pdf_doc.name)
23
  documents = loader.load()
24
+ text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=50)
25
+ texts = text_splitter.split_documents(documents)
 
 
26
  embeddings = OpenAIEmbeddings()
27
+ db = Chroma.from_documents(texts, embeddings)
28
  retriever = db.as_retriever()
29
  global qa
30
  qa = ConversationalRetrievalChain.from_llm(