Spaces:
Sleeping
Sleeping
File size: 1,929 Bytes
c7a62b9 2a5fb30 c7a62b9 7e83395 c7a62b9 7e83395 c7a62b9 2a5fb30 7e83395 2a5fb30 7e83395 c7a62b9 7e83395 2a5fb30 c7a62b9 7e83395 c7a62b9 2a5fb30 c7a62b9 2a5fb30 c7a62b9 2a5fb30 c7a62b9 2a5fb30 c7a62b9 7e83395 c7a62b9 2a5fb30 7e83395 c7a62b9 7e83395 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
import pdfplumber
from sentence_transformers import SentenceTransformer
import streamlit as st
from gtts import gTTS
import os
from sklearn.metrics.pairwise import cosine_similarity
# Function to extract text from a limited number of pages in a PDF
def extract_text_from_pdf(pdf_path, start_page=0, end_page=10):
text = ""
with pdfplumber.open(pdf_path) as pdf:
for page in pdf.pages[start_page:end_page]:
text += page.extract_text() + "\n"
return text
# Load your PDF file (you might want to upload it separately in Spaces)
pdf_path = "/content/Accounting.pdf" # Update this with the actual file path in Spaces
# Initialize the model
model = SentenceTransformer('all-MiniLM-L6-v2')
# Example: Process the first 100 pages in batches
all_sentences = []
for i in range(0, 300, 10): # Adjust the step as needed
pdf_text = extract_text_from_pdf(pdf_path, start_page=i, end_page=i+10)
all_sentences.extend(pdf_text.split('. '))
# Create embeddings from extracted sentences
pdf_embeddings = model.encode(all_sentences, convert_to_tensor=True)
# Function to respond to user query
def respond_to_query(query):
query_embedding = model.encode(query, convert_to_tensor=True)
similarities = cosine_similarity(query_embedding.reshape(1, -1), pdf_embeddings)
best_match_index = similarities.argmax()
response = all_sentences[best_match_index]
return response
# Streamlit app
st.title("Study Assistant")
query = st.text_input("Type your question:")
submit_button = st.button("Ask")
if submit_button:
if query:
response = respond_to_query(query)
# Text-to-Speech
tts = gTTS(response)
tts.save("response.mp3")
# Playing audio (this might not work in Spaces, consider alternatives)
os.system("mpg321 response.mp3")
st.write(response)
else:
st.write("Please enter a question.")
|