Spaces:

Ahmed235
/

final

Sleeping

Ahmed235 commited on Mar 4, 2024

Commit

c4d5545

verified ·

1 Parent(s): 0172e31

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -2,6 +2,8 @@ import gradio as gr
 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import torch.nn.functional as F
 # Load the pre-trained model and tokenizer using gr.load
 model = gr.load("models/Ahmed235/roberta_classification")
@@ -13,8 +15,13 @@ device = torch.device("cpu")
 model = model.to(device)  # Move the model to the CPU
 def extract_text_from_pptx(file_path):
-    # Assume your implementation for text extraction remains the same
-    pass
 def predict_pptx_content(file_path):
     try:

 from transformers import AutoTokenizer, AutoModelForSequenceClassification
 import torch
 import torch.nn.functional as F
+from pptx import Presentation
+import re
 # Load the pre-trained model and tokenizer using gr.load
 model = gr.load("models/Ahmed235/roberta_classification")
 model = model.to(device)  # Move the model to the CPU
 def extract_text_from_pptx(file_path):
+    presentation = Presentation(file_path)
+    text = []
+    for slide_number, slide in enumerate(presentation.slides, start=1):
+        for shape in slide.shapes:
+            if hasattr(shape, "text"):
+                text.append(shape.text)
+    return "\n".join(text)
 def predict_pptx_content(file_path):
     try: