Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -5,32 +5,31 @@ from gtts import gTTS
|
|
5 |
import os
|
6 |
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
|
8 |
-
#
|
9 |
-
|
10 |
-
|
|
|
11 |
with pdfplumber.open(pdf_path) as pdf:
|
12 |
-
for page in pdf.pages
|
13 |
-
text
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
# Create embeddings from extracted sentences
|
29 |
-
pdf_embeddings = model.encode(all_sentences, convert_to_tensor=True)
|
30 |
|
31 |
# Function to respond to user query
|
32 |
def respond_to_query(query):
|
33 |
-
query_embedding =
|
34 |
similarities = cosine_similarity(query_embedding.reshape(1, -1), pdf_embeddings)
|
35 |
best_match_index = similarities.argmax()
|
36 |
response = all_sentences[best_match_index]
|
@@ -45,14 +44,12 @@ submit_button = st.button("Ask")
|
|
45 |
if submit_button:
|
46 |
if query:
|
47 |
response = respond_to_query(query)
|
48 |
-
|
49 |
# Text-to-Speech
|
50 |
tts = gTTS(response)
|
51 |
tts.save("response.mp3")
|
52 |
-
|
53 |
-
# Playing audio
|
54 |
-
os.system("mpg321 response.mp3")
|
55 |
-
|
56 |
st.write(response)
|
57 |
else:
|
58 |
st.write("Please enter a question.")
|
|
|
5 |
import os
|
6 |
from sklearn.metrics.pairwise import cosine_similarity
|
7 |
|
8 |
+
# Load the PDF and extract text once
|
9 |
+
@st.cache_resource
|
10 |
+
def load_pdf_and_extract_text(pdf_path):
|
11 |
+
all_sentences = []
|
12 |
with pdfplumber.open(pdf_path) as pdf:
|
13 |
+
for page in pdf.pages:
|
14 |
+
text = page.extract_text()
|
15 |
+
if text:
|
16 |
+
all_sentences.extend(text.split('. '))
|
17 |
+
return all_sentences
|
18 |
+
|
19 |
+
# Load embeddings for the extracted text
|
20 |
+
@st.cache_resource
|
21 |
+
def create_embeddings(sentences):
|
22 |
+
model = SentenceTransformer('all-MiniLM-L6-v2')
|
23 |
+
return model.encode(sentences, convert_to_tensor=True)
|
24 |
+
|
25 |
+
# Load your PDF file and create embeddings
|
26 |
+
pdf_path = "Accounting.pdf" # Ensure this is uploaded to your space
|
27 |
+
all_sentences = load_pdf_and_extract_text(pdf_path)
|
28 |
+
pdf_embeddings = create_embeddings(all_sentences)
|
|
|
|
|
29 |
|
30 |
# Function to respond to user query
|
31 |
def respond_to_query(query):
|
32 |
+
query_embedding = SentenceTransformer('all-MiniLM-L6-v2').encode(query, convert_to_tensor=True)
|
33 |
similarities = cosine_similarity(query_embedding.reshape(1, -1), pdf_embeddings)
|
34 |
best_match_index = similarities.argmax()
|
35 |
response = all_sentences[best_match_index]
|
|
|
44 |
if submit_button:
|
45 |
if query:
|
46 |
response = respond_to_query(query)
|
47 |
+
|
48 |
# Text-to-Speech
|
49 |
tts = gTTS(response)
|
50 |
tts.save("response.mp3")
|
51 |
+
|
52 |
+
# (Optional) Playing audio might not work in Spaces, consider alternatives
|
|
|
|
|
53 |
st.write(response)
|
54 |
else:
|
55 |
st.write("Please enter a question.")
|