Spaces:

Khd-B
/

Study_Assistant

Sleeping

App Files Files Community

Study_Assistant / app.py

Khd-B

Update app.py

c3ca56d verified 5 days ago

raw

history blame

1.98 kB

	import pdfplumber
	from sentence_transformers import SentenceTransformer
	import streamlit as st
	from gtts import gTTS
	import os
	from sklearn.metrics.pairwise import cosine_similarity

	# Function to extract text from a limited number of pages in a PDF
	def extract_text_from_pdf(pdf_path, start_page=0, end_page=10):
	text = ""
	with pdfplumber.open(pdf_path) as pdf:
	for page in pdf.pages[start_page:end_page]:
	text += page.extract_text() + "\n"
	return text

	# Load your PDF file (you might want to upload it separately in Spaces)
	pdf_path = "https://huggingface.co/spaces/Khd-B/Study_Assistant/raw/main/Accounting.pdf" # Update this with the actual file path in Spaces

	# Initialize the model
	model = SentenceTransformer('all-MiniLM-L6-v2')

	# Example: Process the first 100 pages in batches
	all_sentences = []
	for i in range(0, 300, 10): # Adjust the step as needed
	pdf_text = extract_text_from_pdf(pdf_path, start_page=i, end_page=i+10)
	all_sentences.extend(pdf_text.split('. '))

	# Create embeddings from extracted sentences
	pdf_embeddings = model.encode(all_sentences, convert_to_tensor=True)

	# Function to respond to user query
	def respond_to_query(query):
	query_embedding = model.encode(query, convert_to_tensor=True)
	similarities = cosine_similarity(query_embedding.reshape(1, -1), pdf_embeddings)
	best_match_index = similarities.argmax()
	response = all_sentences[best_match_index]
	return response

	# Streamlit app
	st.title("Study Assistant")

	query = st.text_input("Type your question:")
	submit_button = st.button("Ask")

	if submit_button:
	if query:
	response = respond_to_query(query)

	# Text-to-Speech
	tts = gTTS(response)
	tts.save("response.mp3")

	# Playing audio (this might not work in Spaces, consider alternatives)
	os.system("mpg321 response.mp3")

	st.write(response)
	else:
	st.write("Please enter a question.")