Spaces:
Sleeping
Sleeping
import streamlit as st | |
import PyPDF2 | |
from transformers import AutoTokenizer, AutoModel | |
import torch | |
import numpy as np | |
import faiss | |
from gtts import gTTS | |
import os | |
# Initialize the model and tokenizer | |
model_name = "sentence-transformers/all-MiniLM-L6-v2" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModel.from_pretrained(model_name) | |
# Function to get embeddings | |
def get_embedding(text): | |
inputs = tokenizer(text, return_tensors='pt') | |
with torch.no_grad(): | |
embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy() | |
return embeddings | |
# Initialize FAISS index | |
embeddings_dimension = 384 # for MiniLM | |
index = faiss.IndexFlatL2(embeddings_dimension) | |
# Title of the app | |
st.title("Study Assistant for Grade 9") | |
# File uploader widget | |
uploaded_file = st.file_uploader("Upload your PDF file", type=["pdf"]) | |
if uploaded_file is not None: | |
# Read the uploaded PDF file | |
pdf_reader = PyPDF2.PdfReader(uploaded_file) | |
text = "" | |
# Extract text from each page | |
for page in pdf_reader.pages: | |
text += page.extract_text() if page.extract_text() else "" | |
st.subheader("Extracted Text:") | |
st.write(text) | |
# Generate embedding for the extracted text | |
embeddings = get_embedding(text) | |
index.add(embeddings) # Add embedding to the FAISS index | |
st.success("Text extracted and embeddings generated!") | |
# Subject selection and query input | |
subject = st.selectbox("Select Subject", ["Math", "Science", "English"]) | |
query = st.text_input("Type your query") | |
if st.button("Submit"): | |
if query: | |
# Get embedding for the query | |
query_embedding = get_embedding(query) | |
# Search for the nearest neighbors in the FAISS index | |
D, I = index.search(query_embedding, k=5) # Retrieve top 5 matches | |
st.subheader("Top Matches:") | |
for idx in I[0]: | |
if idx < len(embeddings): # Ensure index is valid | |
st.write(f"Match Index: {idx}, Distance: {D[0][idx]}") # Display match details | |
# Convert response to speech | |
response_text = f"You asked about '{query}' in {subject}. Here are your top matches." | |
tts = gTTS(text=response_text, lang='en') | |
tts.save("response.mp3") | |
# Display audio controls (Streamlit doesn't support direct playback) | |
st.audio("response.mp3") | |
st.success("Response generated!") | |