Spaces:

Ahmed235
/

final

Sleeping

File size: 2,400 Bytes

44e21a6
 
98c0f54
 
 
 
f4067be
98c0f54
 
 
 
 
44e21a6
2bb61b8
06bc437
 
 
 
 
 
 
2bb61b8
12b0ed7
95d05cb
8cb1867
 
1aa90a2
98c0f54
 
 
 
 
 
 
 
 
 
 
 
 
 
1aa90a2
95d05cb
98c0f54
 
95d05cb
367a8a1
95d05cb
 
 
 
 
 
12b0ed7
 
 
 
7044543
98c0f54
f4067be
bcb2ab6
12b0ed7
 
 
98c0f54

from pptx import Presentation
import re
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import torch.nn.functional as F

# Load the pre-trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("Ahmed235/roberta_classification")
model = AutoModelForSequenceClassification.from_pretrained("Ahmed235/roberta_classification")
device = torch.device("cpu")
model = model.to(device)  # Move the model to the CPU

def extract_text_from_pptx(file_path):
    presentation = Presentation(file_path)
    text = []
    for slide_number, slide in enumerate(presentation.slides, start=1):
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text.append(shape.text)
    return "\n".join(text)

def predict_pptx_content(file_path):
    try:
        extracted_text = extract_text_from_pptx(file_path)
        cleaned_text = re.sub(r'\s+', ' ', extracted_text)

        # Tokenize and encode the cleaned text
        input_encoding = tokenizer(cleaned_text, truncation=True, padding=True, return_tensors="pt")
        input_encoding = {key: val.to(device) for key, val in input_encoding.items()}  # Move input tensor to CPU

        # Perform inference
        with torch.no_grad():
            outputs = model(**input_encoding)
            logits = outputs.logits

        probabilities = F.softmax(logits, dim=1)

        predicted_label_id = torch.argmax(logits, dim=1).item()
        predicted_label = model.config.id2label[predicted_label_id]
        predicted_probability = probabilities[0][predicted_label_id].item()

        prediction = {
            "Predicted Label": predicted_label,
            "Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}",
        }

        return prediction

    except Exception as e:
        # Log the error details
        print(f"Error in predict_pptx_content: {e}")
        return {"error": str(e)}

# Define the Gradio interface
iface = gr.Interface(
    fn=predict_pptx_content,
    inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
    outputs=["text", "text"],  # Predicted Label, Evaluation
    live=False,  # Change to True for one-time analysis
    title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>",
)

# Deploy the Gradio interface
iface.launch(share=True)