File size: 1,998 Bytes
98c0f54
18332e8
2292b28
bbeaa3a
09ca2da
18332e8
 
ecd8fcb
09ca2da
2bb61b8
c4d5545
 
 
 
 
 
 
2bb61b8
12b0ed7
95d05cb
8cb1867
b246175
18332e8
 
98c0f54
18332e8
 
b246175
95d05cb
09ca2da
8b24c55
09ca2da
95d05cb
367a8a1
95d05cb
 
 
 
 
 
12b0ed7
 
 
 
fcf7672
09ca2da
f4067be
bcb2ab6
12b0ed7
 
 
2292b28
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
import gradio as gr
from transformers import pipeline
from pptx import Presentation  # Import the Presentation class
import re

# Create a text classification pipeline
classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification")
summarizer = pipeline("summarization", model="Falconsai/text_summarization")

def extract_text_from_pptx(file_path):
    presentation = Presentation(file_path)
    text = []
    for slide_number, slide in enumerate(presentation.slides, start=1):
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text.append(shape.text)
    return "\n".join(text)

def predict_pptx_content(file_path):
    try:
        extracted_text = extract_text_from_pptx(file_path)
        cleaned_text = re.sub(r'\s+', ' ', extracted_text)
        # Perform inference using the pipeline
        result = classifier(extracted_text)

        predicted_label = result[0]['label']
        predicted_probability = result[0]['score']
        summary = summarizer(extracted_text, max_length=80, min_length=30, do_sample=False)[0]['summary_text']
        prediction = {
            "Summary": summary,
            "Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}",
            "Predicted Label": predicted_label,
        }

        return prediction

    except Exception as e:
        # Log the error details
        print(f"Error in predict_pptx_content: {e}")
        return {"error": str(e)}

# Define the Gradio interface
iface = gr.Interface(
    fn=predict_pptx_content,
    inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
    outputs=[gr.Textbox("Summary"), gr.Textbox("Evaluation"), gr.Textbox("Predicted Label")],
    live=False,  # Change to True for one-time analysis
    title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>",
)

# Deploy the Gradio interface
iface.launch(share=True)