Spaces:

Khd-B
/

Study_Assistant

Sleeping

App Files Files Community

Khd-B commited on 5 days ago

Commit

51c0f70

•

1 Parent(s): 00da70a

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -11

app.py CHANGED Viewed

@@ -5,31 +5,40 @@ from gtts import gTTS
 import os
 from sklearn.metrics.pairwise import cosine_similarity
-# Load the PDF and extract text once
 @st.cache_resource
-def load_pdf_and_extract_text(pdf_path):
     all_sentences = []
     with pdfplumber.open(pdf_path) as pdf:
-        for page in pdf.pages:
             text = page.extract_text()
             if text:
                 all_sentences.extend(text.split('. '))
     return all_sentences
-# Load embeddings for the extracted text
 @st.cache_resource
 def create_embeddings(sentences):
-    model = SentenceTransformer('all-MiniLM-L6-v2')
     return model.encode(sentences, convert_to_tensor=True)
-# Load your PDF file and create embeddings
-pdf_path = "Accounting.pdf"  # Ensure this is uploaded to your space
-all_sentences = load_pdf_and_extract_text(pdf_path)
 pdf_embeddings = create_embeddings(all_sentences)
 # Function to respond to user query
 def respond_to_query(query):
-    query_embedding = SentenceTransformer('all-MiniLM-L6-v2').encode(query, convert_to_tensor=True)
     similarities = cosine_similarity(query_embedding.reshape(1, -1), pdf_embeddings)
     best_match_index = similarities.argmax()
     response = all_sentences[best_match_index]
@@ -44,11 +53,11 @@ submit_button = st.button("Ask")
 if submit_button:
     if query:
         response = respond_to_query(query)
         # Text-to-Speech
         tts = gTTS(response)
         tts.save("response.mp3")
         # (Optional) Playing audio might not work in Spaces, consider alternatives
         st.write(response)
     else:

 import os
 from sklearn.metrics.pairwise import cosine_similarity
+# Function to extract text from a limited number of pages in a PDF
 @st.cache_resource
+def load_pdf_and_extract_text(pdf_path, max_pages=20):
     all_sentences = []
     with pdfplumber.open(pdf_path) as pdf:
+        total_pages = len(pdf.pages)
+        st.write(f"Total pages to process: {total_pages}")
+        for i, page in enumerate(pdf.pages):
+            if i >= max_pages:
+                break
+            st.write(f"Processing page {i + 1}...")
             text = page.extract_text()
             if text:
                 all_sentences.extend(text.split('. '))
+            st.progress((i + 1) / max_pages)  # Update progress
     return all_sentences
+# Load your PDF file
+pdf_path = "Accounting.pdf"  # Ensure this is uploaded to your space
+all_sentences = load_pdf_and_extract_text(pdf_path)
+# Initialize the model
+model = SentenceTransformer('all-MiniLM-L6-v2')
+# Create embeddings from extracted sentences
 @st.cache_resource
 def create_embeddings(sentences):
     return model.encode(sentences, convert_to_tensor=True)
 pdf_embeddings = create_embeddings(all_sentences)
 # Function to respond to user query
 def respond_to_query(query):
+    query_embedding = model.encode(query, convert_to_tensor=True)
     similarities = cosine_similarity(query_embedding.reshape(1, -1), pdf_embeddings)
     best_match_index = similarities.argmax()
     response = all_sentences[best_match_index]
 if submit_button:
     if query:
         response = respond_to_query(query)
         # Text-to-Speech
         tts = gTTS(response)
         tts.save("response.mp3")
         # (Optional) Playing audio might not work in Spaces, consider alternatives
         st.write(response)
     else: