Khd-B commited on
Commit
2a5fb30
·
verified ·
1 Parent(s): 060ef6c

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -0
app.py ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import PyPDF2
3
+ from transformers import AutoTokenizer, AutoModel
4
+ import torch
5
+ import numpy as np
6
+ import faiss
7
+ from gtts import gTTS
8
+ import os
9
+
10
+ # Initialize the model and tokenizer
11
+ model_name = "sentence-transformers/all-MiniLM-L6-v2"
12
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
13
+ model = AutoModel.from_pretrained(model_name)
14
+
15
+ # Function to get embeddings
16
+ def get_embedding(text):
17
+ inputs = tokenizer(text, return_tensors='pt')
18
+ with torch.no_grad():
19
+ embeddings = model(**inputs).last_hidden_state.mean(dim=1).numpy()
20
+ return embeddings
21
+
22
+ # Initialize FAISS index
23
+ embeddings_dimension = 384 # for MiniLM
24
+ index = faiss.IndexFlatL2(embeddings_dimension)
25
+
26
+ # Title of the app
27
+ st.title("Study Assistant for Grade 9")
28
+
29
+ # File uploader widget
30
+ uploaded_file = st.file_uploader("Upload your PDF file", type=["pdf"])
31
+
32
+ if uploaded_file is not None:
33
+ # Read the uploaded PDF file
34
+ pdf_reader = PyPDF2.PdfReader(uploaded_file)
35
+ text = ""
36
+
37
+ # Extract text from each page
38
+ for page in pdf_reader.pages:
39
+ text += page.extract_text() if page.extract_text() else ""
40
+
41
+ st.subheader("Extracted Text:")
42
+ st.write(text)
43
+
44
+ # Generate embedding for the extracted text
45
+ embeddings = get_embedding(text)
46
+ index.add(embeddings) # Add embedding to the FAISS index
47
+
48
+ st.success("Text extracted and embeddings generated!")
49
+
50
+ # Subject selection and query input
51
+ subject = st.selectbox("Select Subject", ["Math", "Science", "English"])
52
+ query = st.text_input("Type your query")
53
+
54
+ if st.button("Submit"):
55
+ if query:
56
+ # Get embedding for the query
57
+ query_embedding = get_embedding(query)
58
+
59
+ # Search for the nearest neighbors in the FAISS index
60
+ D, I = index.search(query_embedding, k=5) # Retrieve top 5 matches
61
+
62
+ st.subheader("Top Matches:")
63
+ for idx in I[0]:
64
+ if idx < len(embeddings): # Ensure index is valid
65
+ st.write(f"Match Index: {idx}, Distance: {D[0][idx]}") # You can display the match details
66
+
67
+ # Convert response to speech
68
+ response_text = f"You asked about '{query}' in {subject}. Here are your top matches."
69
+ tts = gTTS(text=response_text, lang='en')
70
+ tts.save("response.mp3")
71
+ os.system("start response.mp3") # Adjust for different OS
72
+
73
+ st.success("Response generated and spoken!")
74
+
75
+ # Note: To handle errors or improve this further, add appropriate try-except blocks and validations.