Shahabmoin commited on
Commit
45975ce
·
verified ·
1 Parent(s): 3d6b54f

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +78 -0
app.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import PyPDF2
3
+ import faiss
4
+ import streamlit as st
5
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
6
+ from langchain.embeddings import HuggingFaceEmbeddings
7
+ from langchain.vectorstores import FAISS
8
+ from groq import Groq
9
+
10
+ # Initialize Groq API
11
+ client = Groq(api_key=os.environ.get("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"))
12
+
13
+ # Function to extract text from PDF
14
+ def extract_text_from_pdf(pdf_path):
15
+ text = ""
16
+ with open(pdf_path, "rb") as file:
17
+ reader = PyPDF2.PdfReader(file)
18
+ for page in reader.pages:
19
+ text += page.extract_text()
20
+ return text
21
+
22
+ # Function to create chunks and embeddings using LangChain
23
+ def process_text_with_langchain(text):
24
+ # Split text into chunks
25
+ text_splitter = RecursiveCharacterTextSplitter(
26
+ chunk_size=500, chunk_overlap=50
27
+ )
28
+ chunks = text_splitter.split_text(text)
29
+
30
+ # Create embeddings and FAISS index
31
+ embeddings = HuggingFaceEmbeddings()
32
+ vectorstore = FAISS.from_texts(chunks, embeddings)
33
+
34
+ return vectorstore, chunks
35
+
36
+ # Function to query FAISS index
37
+ def query_faiss_index(query, vectorstore):
38
+ docs = vectorstore.similarity_search(query, k=3)
39
+ results = [doc.page_content for doc in docs]
40
+ return results
41
+
42
+ # Function to interact with Groq LLM
43
+ def ask_groq(query):
44
+ chat_completion = client.chat.completions.create(
45
+ messages=[
46
+ {
47
+ "role": "user",
48
+ "content": query,
49
+ }
50
+ ],
51
+ model="llama3-8b-8192",
52
+ stream=False,
53
+ )
54
+ return chat_completion.choices[0].message.content
55
+
56
+ # Streamlit app
57
+ st.title("RAG-Based Chatbot")
58
+
59
+ uploaded_file = st.file_uploader("Upload a PDF file", type="pdf")
60
+ if uploaded_file is not None:
61
+ with open("uploaded_file.pdf", "wb") as f:
62
+ f.write(uploaded_file.read())
63
+
64
+ st.info("Processing the PDF...")
65
+ text = extract_text_from_pdf("uploaded_file.pdf")
66
+ vectorstore, chunks = process_text_with_langchain(text)
67
+
68
+ st.success("PDF processed and indexed successfully!")
69
+
70
+ query = st.text_input("Ask a question about the document")
71
+ if query:
72
+ st.info("Searching relevant chunks...")
73
+ relevant_chunks = query_faiss_index(query, vectorstore)
74
+ context = "\n".join(relevant_chunks)
75
+
76
+ st.info("Getting response from the language model...")
77
+ response = ask_groq(f"Context: {context}\n\nQuestion: {query}")
78
+ st.success(response)