Update app.py
Browse files
app.py
CHANGED
@@ -4,17 +4,15 @@ from transformers import pipeline
|
|
4 |
import gradio as gr
|
5 |
|
6 |
def extract_text_from_pptx(file_path):
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
print(f"Error extracting text from PowerPoint: {e}")
|
17 |
-
raise # Re-raise the exception for further investigation
|
18 |
|
19 |
|
20 |
def predict_pptx_content(file_path):
|
@@ -22,7 +20,7 @@ def predict_pptx_content(file_path):
|
|
22 |
try:
|
23 |
extracted_text = extract_text_from_pptx(file_path)
|
24 |
print(f"Extracted text: {extracted_text}")
|
25 |
-
|
26 |
cleaned_text = re.sub(r'\s+', ' ', extracted_text)
|
27 |
print(f"Cleaned text: {cleaned_text}")
|
28 |
|
|
|
4 |
import gradio as gr
|
5 |
|
6 |
def extract_text_from_pptx(file_path):
|
7 |
+
presentation = Presentation(file_path)
|
8 |
+
|
9 |
+
text = []
|
10 |
+
for slide_number, slide in enumerate(presentation.slides, start=1):
|
11 |
+
for shape in slide.shapes:
|
12 |
+
if hasattr(shape, "text"):
|
13 |
+
text.append(shape.text)
|
14 |
+
|
15 |
+
return "\n".join(text)
|
|
|
|
|
16 |
|
17 |
|
18 |
def predict_pptx_content(file_path):
|
|
|
20 |
try:
|
21 |
extracted_text = extract_text_from_pptx(file_path)
|
22 |
print(f"Extracted text: {extracted_text}")
|
23 |
+
|
24 |
cleaned_text = re.sub(r'\s+', ' ', extracted_text)
|
25 |
print(f"Cleaned text: {cleaned_text}")
|
26 |
|