jchen8000 commited on
Commit
edb320d
·
verified ·
1 Parent(s): 1c3c456

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -9
app.py CHANGED
@@ -1,10 +1,14 @@
1
  import gradio as gr
2
- from langchain.document_loaders import PyPDFLoader
3
- from langchain.text_splitter import RecursiveCharacterTextSplitter
4
- from langchain.embeddings import OpenAIEmbeddings
5
  from langchain.vectorstores import FAISS
6
  from langchain.chains import RetrievalQA
7
- from langchain.llms import OpenAI
 
 
 
 
8
 
9
  # Initialize the FAISS vector store
10
  vector_store = None
@@ -16,15 +20,17 @@ def index_pdf(pdf):
16
  # Load the PDF
17
  loader = PyPDFLoader(pdf.name)
18
  documents = loader.load()
19
-
20
  # Split the documents into chunks
21
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
22
  texts = text_splitter.split_documents(documents)
23
-
24
- # Embed the chunks and store them in the vector store
25
- embeddings = OpenAIEmbeddings()
 
 
26
  vector_store = FAISS.from_documents(texts, embeddings)
27
-
28
  return "PDF indexed successfully!"
29
 
30
  # Function to handle chatbot queries
 
1
  import gradio as gr
2
+ from langchain_community.document_loaders import PyPDFLoader
3
+ from langchain_text_splitters import RecursiveCharacterTextSplitter
4
+ from langchain_huggingface import HuggingFaceEmbeddings
5
  from langchain.vectorstores import FAISS
6
  from langchain.chains import RetrievalQA
7
+ from langchain_groq import ChatGroq
8
+ from langchain_core.prompts import PromptTemplate
9
+ from langchain_core.output_parsers import StrOutputParser
10
+ from langchain_core.runnables import RunnablePassthrough
11
+
12
 
13
  # Initialize the FAISS vector store
14
  vector_store = None
 
20
  # Load the PDF
21
  loader = PyPDFLoader(pdf.name)
22
  documents = loader.load()
23
+
24
  # Split the documents into chunks
25
  text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
26
  texts = text_splitter.split_documents(documents)
27
+
28
+ # Embed the chunks
29
+ embeddings = HuggingFaceEmbeddings(model_name="bert-base-uncased", encode_kwargs={"normalize_embeddings": True})
30
+
31
+ # Store the embeddings in the vector store
32
  vector_store = FAISS.from_documents(texts, embeddings)
33
+
34
  return "PDF indexed successfully!"
35
 
36
  # Function to handle chatbot queries