Ahmed235 commited on
Commit
c4d5545
·
verified ·
1 Parent(s): 0172e31

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -2,6 +2,8 @@ import gradio as gr
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
  import torch.nn.functional as F
 
 
5
 
6
  # Load the pre-trained model and tokenizer using gr.load
7
  model = gr.load("models/Ahmed235/roberta_classification")
@@ -13,8 +15,13 @@ device = torch.device("cpu")
13
  model = model.to(device) # Move the model to the CPU
14
 
15
  def extract_text_from_pptx(file_path):
16
- # Assume your implementation for text extraction remains the same
17
- pass
 
 
 
 
 
18
 
19
  def predict_pptx_content(file_path):
20
  try:
 
2
  from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  import torch
4
  import torch.nn.functional as F
5
+ from pptx import Presentation
6
+ import re
7
 
8
  # Load the pre-trained model and tokenizer using gr.load
9
  model = gr.load("models/Ahmed235/roberta_classification")
 
15
  model = model.to(device) # Move the model to the CPU
16
 
17
  def extract_text_from_pptx(file_path):
18
+ presentation = Presentation(file_path)
19
+ text = []
20
+ for slide_number, slide in enumerate(presentation.slides, start=1):
21
+ for shape in slide.shapes:
22
+ if hasattr(shape, "text"):
23
+ text.append(shape.text)
24
+ return "\n".join(text)
25
 
26
  def predict_pptx_content(file_path):
27
  try: