Spaces:
Sleeping
Sleeping
File size: 6,704 Bytes
7b65368 f9d8be7 34a43e5 f9d8be7 7b65368 f9d8be7 7b65368 f9d8be7 7b65368 f9d8be7 7b65368 f9d8be7 34a43e5 f9d8be7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 |
# import os
# from groq import Groq
# from langchain_community.embeddings import HuggingFaceEmbeddings
# from langchain_community.vectorstores import FAISS
# from langchain.text_splitter import RecursiveCharacterTextSplitter
# from PyPDF2 import PdfReader
# import streamlit as st
# from tempfile import NamedTemporaryFile
# # Initialize Groq client
# client = Groq(api_key=os.getenv("Groq_api_key"))
# # client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# # Function to extract text from a PDF
# def extract_text_from_pdf(pdf_file_path):
# pdf_reader = PdfReader(pdf_file_path)
# text = ""
# for page in pdf_reader.pages:
# text += page.extract_text()
# return text
# # Function to split text into chunks
# def chunk_text(text, chunk_size=500, chunk_overlap=50):
# text_splitter = RecursiveCharacterTextSplitter(
# chunk_size=chunk_size, chunk_overlap=chunk_overlap
# )
# return text_splitter.split_text(text)
# # Function to create embeddings and store them in FAISS
# def create_embeddings_and_store(chunks):
# embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
# vector_db = FAISS.from_texts(chunks, embedding=embeddings)
# return vector_db
# # Function to query the vector database and interact with Groq
# def query_vector_db(query, vector_db):
# # Retrieve relevant documents
# docs = vector_db.similarity_search(query, k=3)
# context = "\n".join([doc.page_content for doc in docs])
# # Interact with Groq API
# chat_completion = client.chat.completions.create(
# messages=[
# {"role": "system", "content": f"Use the following context:\n{context}"},
# {"role": "user", "content": query},
# ],
# model="llama3-8b-8192",
# )
# return chat_completion.choices[0].message.content
# # Streamlit app
# st.title("Interactive PDF Reader and Chat")
# # Upload PDF
# uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])
# if uploaded_file:
# with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
# temp_file.write(uploaded_file.read())
# pdf_path = temp_file.name
# # Extract text, chunk it, and create embeddings
# text = extract_text_from_pdf(pdf_path)
# chunks = chunk_text(text)
# vector_db = create_embeddings_and_store(chunks)
# # State management for the chat
# if "chat_history" not in st.session_state:
# st.session_state.chat_history = []
# # Display chat history
# for i, chat in enumerate(st.session_state.chat_history):
# st.write(f"**Query {i+1}:** {chat['query']}")
# st.write(f"**Response:** {chat['response']}")
# st.write("---")
# # Add new query input dynamically
# if "query_count" not in st.session_state:
# st.session_state.query_count = 1
# query_key = f"query_{st.session_state.query_count}"
# user_query = st.text_input(f"Enter Query {st.session_state.query_count}:", key=query_key)
# if user_query:
# # Generate response
# response = query_vector_db(user_query, vector_db)
# # Append query and response to the chat history
# st.session_state.chat_history.append({"query": user_query, "response": response})
# # Increment query count for the next input box
# st.session_state.query_count += 1
# # Rerun to show the updated UI
# st.experimental_rerun()
import os
from groq import Groq
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from PyPDF2 import PdfReader
import streamlit as st
from tempfile import NamedTemporaryFile
# Initialize Groq client
client = Groq(api_key=os.getenv("Groq_api_key"))
# Function to extract text from a PDF
def extract_text_from_pdf(pdf_file_path):
pdf_reader = PdfReader(pdf_file_path)
text = ""
for page in pdf_reader.pages:
text += page.extract_text()
return text
# Function to split text into chunks
def chunk_text(text, chunk_size=500, chunk_overlap=50):
text_splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size, chunk_overlap=chunk_overlap
)
return text_splitter.split_text(text)
# Function to create embeddings and store them in FAISS
def create_embeddings_and_store(chunks):
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
vector_db = FAISS.from_texts(chunks, embedding=embeddings)
return vector_db
# Function to query the vector database and interact with Groq
def query_vector_db(query, vector_db):
# Retrieve relevant documents
docs = vector_db.similarity_search(query, k=3)
context = "\n".join([doc.page_content for doc in docs])
# Interact with Groq API
chat_completion = client.chat.completions.create(
messages=[
{"role": "system", "content": f"Use the following context:\n{context}"},
{"role": "user", "content": query},
],
model="llama3-8b-8192",
)
return chat_completion.choices[0].message.content
# Streamlit app
st.title("Interactive PDF Reader and Chat")
# Upload PDF
uploaded_file = st.file_uploader("Upload a PDF document", type=["pdf"])
if uploaded_file:
with NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(uploaded_file.read())
pdf_path = temp_file.name
# Extract text, chunk it, and create embeddings
if "vector_db" not in st.session_state:
text = extract_text_from_pdf(pdf_path)
chunks = chunk_text(text)
st.session_state.vector_db = create_embeddings_and_store(chunks)
# Initialize chat history if not already done
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# Display chat history
for i, chat in enumerate(st.session_state.chat_history):
st.write(f"**Query {i+1}:** {chat['query']}")
st.write(f"**Response:** {chat['response']}")
st.write("---")
# Add new query input dynamically
query_key = f"query_{len(st.session_state.chat_history) + 1}"
user_query = st.text_input("Enter your query:", key=query_key)
if user_query:
# Generate response
response = query_vector_db(user_query, st.session_state.vector_db)
# Append query and response to the chat history
st.session_state.chat_history.append({"query": user_query, "response": response})
# Update query parameters to trigger a soft refresh
st.query_params["chat_length"] = len(st.session_state.chat_history)
|