Spaces:

Ahmed235
/

final

Sleeping

File size: 2,158 Bytes

98c0f54
18332e8
d6ff263
bbeaa3a
09ca2da
18332e8
 
ecd8fcb
09ca2da
2bb61b8
c4d5545
 
 
 
 
 
 
2bb61b8
d6ff263
 
 
 
12b0ed7
95d05cb
8cb1867
b246175
d6ff263
 
 
 
18332e8
d6ff263
98c0f54
18332e8
 
d6ff263
d077b39
 
8d2b38e
 
d077b39
367a8a1
95d05cb
 
 
 
 
 
12b0ed7
 
 
 
fcf7672
8d2b38e
d6ff263
bcb2ab6
12b0ed7
 
 
2292b28

import gradio as gr
from transformers import pipeline
from pptx import Presentation
import re

# Create a text classification pipeline
classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification")
summarizer = pipeline("summarization", model="Falconsai/text_summarization")

def extract_text_from_pptx(file_path):
    presentation = Presentation(file_path)
    text = []
    for slide_number, slide in enumerate(presentation.slides, start=1):
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text.append(shape.text)
    return "\n".join(text)

def limit_text_length(text, max_length=512):
    # Truncate or limit the text length
    return text[:max_length]

def predict_pptx_content(file_path):
    try:
        extracted_text = extract_text_from_pptx(file_path)
        cleaned_text = re.sub(r'\s+', ' ', extracted_text)
        
        # Limit text length before classification
        limited_text = limit_text_length(cleaned_text)

        # Perform inference using the pipeline
        result = classifier(limited_text)

        predicted_label = result[0]['label']
        predicted_probability = result[0]['score']
        summary = summarizer(extracted_text, max_length=80, min_length=30, do_sample=False)[0]['summary_text']
        prediction = {
            "text": f"Predicted Label: {predicted_label}",
            "text": f"Evaluation: Evaluate the topic according to {predicted_label} is: {predicted_probability}",
            "text": f"Summary: {summary}",
        }

        return prediction

    except Exception as e:
        # Log the error details
        print(f"Error in predict_pptx_content: {e}")
        return {"error": str(e)}

# Define the Gradio interface
iface = gr.Interface(
    fn=predict_pptx_content,
    inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
    outputs=["text", "text", "text"],  # Use "text" for all components
    live=False,
    title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>",
)

# Deploy the Gradio interface
iface.launch(share=True)