Spaces:

Khd-B
/

Study_Assistant

Sleeping

App Files Files Community

Khd-B commited on Oct 28, 2024

Commit

c7a62b9

verified ·

1 Parent(s): 1e83790

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -60

app.py CHANGED Viewed

@@ -1,76 +1,57 @@
 import streamlit as st
-import PyPDF2
-from transformers import AutoTokenizer, AutoModel
-import torch
-import numpy as np
-import faiss
 from gtts import gTTS
 import os
-# Initialize the model and tokenizer
-model_name = "sentence-transformers/all-MiniLM-L6-v2"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
-model = AutoModel.from_pretrained(model_name)
-# Function to get embeddings
-def get_embedding(text):
-    inputs = tokenizer(text, return_tensors='pt')
-    with torch.no_grad():
-        embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
-    return embeddings
-# Initialize FAISS index
-embeddings_dimension = 384  # for MiniLM
-index = faiss.IndexFlatL2(embeddings_dimension)
-# Title of the app
-st.title("Study Assistant for Grade 9")
-# File uploader widget
-uploaded_file = st.file_uploader("Upload your PDF file", type=["pdf"])
-if uploaded_file is not None:
-    # Read the uploaded PDF file
-    pdf_reader = PyPDF2.PdfReader(uploaded_file)
-    text = ""
-    # Extract text from each page
-    for page in pdf_reader.pages:
-        text += page.extract_text() if page.extract_text() else ""
-    st.subheader("Extracted Text:")
-    st.write(text)
-    # Generate embedding for the extracted text
-    embeddings = get_embedding(text)
-    index.add(embeddings)  # Add embedding to the FAISS index
-    st.success("Text extracted and embeddings generated!")
-# Subject selection and query input
-subject = st.selectbox("Select Subject", ["Accounting"])
-query = st.text_input("Type your query")
-if st.button("Submit"):
     if query:
-        # Get embedding for the query
-        query_embedding = get_embedding(query)
-        # Search for the nearest neighbors in the FAISS index
-        D, I = index.search(query_embedding, k=5)  # Retrieve top 5 matches
-        st.subheader("Top Matches:")
-        for idx in I[0]:
-            if idx < len(embeddings):  # Ensure index is valid
-                st.write(f"Match Index: {idx}, Distance: {D[0][idx]}")  # Display match details
-        # Convert response to speech
-        response_text = f"You asked about '{query}' in {subject}. Here are your top matches."
-        tts = gTTS(text=response_text, lang='en')
         tts.save("response.mp3")
-        # Display audio controls (Streamlit doesn't support direct playback)
-        st.audio("response.mp3")
-        st.success("Response generated!")

+import pdfplumber
+from sentence_transformers import SentenceTransformer
 import streamlit as st
 from gtts import gTTS
 import os
+from sklearn.metrics.pairwise import cosine_similarity
+# Function to extract text from a PDF
+def extract_text_from_pdf(pdf_path):
+    text = ""
+    with pdfplumber.open(pdf_path) as pdf:
+        for page in pdf.pages:
+            text += page.extract_text() + "\n"
+    return text
+# Load your PDF file (upload it in Colab)
+pdf_path = "/content/Accounting.pdf"  # Change this to your uploaded PDF file path
+pdf_text = extract_text_from_pdf(pdf_path)
+# Create embeddings from the PDF text
+model = SentenceTransformer('all-MiniLM-L6-v2')
+pdf_sentences = pdf_text.split('. ')  # Split text into sentences for embedding
+pdf_embeddings = model.encode(pdf_sentences, convert_to_tensor=True)
+# Function to respond to user query
+def respond_to_query(query):
+    query_embedding = model.encode(query, convert_to_tensor=True)
+    # Find the closest sentence based on cosine similarity
+    from sklearn.metrics.pairwise import cosine_similarity
+    similarities = cosine_similarity(query_embedding.reshape(1, -1), pdf_embeddings)
+    best_match_index = similarities.argmax()
+    response = pdf_sentences[best_match_index]
+    return response
+# Streamlit app
+st.title("Study Assistant")
+query = st.text_input("Type your question:")
+submit_button = st.button("Ask")
+if submit_button:
     if query:
+        response = respond_to_query(query)
+        # Text-to-Speech
+        tts = gTTS(response)
         tts.save("response.mp3")
+        # Playing audio
+        os.system("mpg321 response.mp3")  # Ensure mpg321 is installed in the Colab environment
+        st.write(response)
+    else:
+        st.write("Please enter a question.")
+# Run the Streamlit app and expose it
+!streamlit run app.py & npx localtunnel --port 8501