Ahmed235 commited on
Commit
18332e8
·
verified ·
1 Parent(s): 1a023c5

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -24
app.py CHANGED
@@ -1,15 +1,8 @@
1
  import gradio as gr
2
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
- import torch
4
- import torch.nn.functional as F
5
- from pptx import Presentation
6
- import re
7
 
8
- # Load the pre-trained model and tokenizer using gr.load
9
- model = gr.load("models/Ahmed235/roberta_classification")
10
-
11
- # Tokenizer can be loaded using transformers directly
12
- tokenizer = AutoTokenizer.from_pretrained("Ahmed235/roberta_classification")
13
 
14
  def extract_text_from_pptx(file_path):
15
  presentation = Presentation(file_path)
@@ -23,22 +16,12 @@ def extract_text_from_pptx(file_path):
23
  def predict_pptx_content(file_path):
24
  try:
25
  extracted_text = extract_text_from_pptx(file_path)
26
- cleaned_text = re.sub(r'\s+', ' ', extracted_text)
27
-
28
- # Tokenize and encode the cleaned text
29
- input_encoding = tokenizer(cleaned_text, truncation=True, padding=True, return_tensors="pt")
30
- input_encoding = {key: val.to(device) for key, val in input_encoding.items()} # Move input tensor to CPU
31
-
32
- # Perform inference
33
- with torch.no_grad():
34
- outputs = model(**input_encoding)
35
- logits = outputs.logits
36
 
37
- probabilities = F.softmax(logits, dim=1)
 
38
 
39
- predicted_label_id = torch.argmax(logits, dim=1).item()
40
- predicted_label = model.config.id2label[predicted_label_id]
41
- predicted_probability = probabilities[0][predicted_label_id].item()
42
 
43
  prediction = {
44
  "Predicted Label": predicted_label,
 
1
  import gradio as gr
2
+ from transformers import pipeline
 
 
 
 
3
 
4
+ # Create a text classification pipeline
5
+ classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification")
 
 
 
6
 
7
  def extract_text_from_pptx(file_path):
8
  presentation = Presentation(file_path)
 
16
  def predict_pptx_content(file_path):
17
  try:
18
  extracted_text = extract_text_from_pptx(file_path)
 
 
 
 
 
 
 
 
 
 
19
 
20
+ # Perform inference using the pipeline
21
+ result = classifier(extracted_text)
22
 
23
+ predicted_label = result[0]['label']
24
+ predicted_probability = result[0]['score']
 
25
 
26
  prediction = {
27
  "Predicted Label": predicted_label,