File size: 2,007 Bytes
98c0f54
18332e8
d6ff263
bbeaa3a
09ca2da
18332e8
 
ecd8fcb
09ca2da
2bb61b8
c4d5545
 
 
 
 
 
 
2bb61b8
d6ff263
 
 
 
12b0ed7
95d05cb
8cb1867
b246175
d6ff263
 
 
 
18332e8
d6ff263
98c0f54
18332e8
 
124a463
c1117f1
985d946
2e63d67
 
 
ae2295f
b3c1835
2e63d67
b093982
95d05cb
 
12b0ed7
 
 
 
b093982
d8e3fd6
d6ff263
bcb2ab6
12b0ed7
 
 
124a463
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
import gradio as gr
from transformers import pipeline
from pptx import Presentation
import re

# Create a text classification pipeline
classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification")
summarizer = pipeline("summarization", model="Falconsai/text_summarization")

def extract_text_from_pptx(file_path):
    presentation = Presentation(file_path)
    text = []
    for slide_number, slide in enumerate(presentation.slides, start=1):
        for shape in slide.shapes:
            if hasattr(shape, "text"):
                text.append(shape.text)
    return "\n".join(text)

def limit_text_length(text, max_length=512):
    # Truncate or limit the text length
    return text[:max_length]

def predict_pptx_content(file_path):
    try:
        extracted_text = extract_text_from_pptx(file_path)
        cleaned_text = re.sub(r'\s+', ' ', extracted_text)
        
        # Limit text length before classification
        limited_text = limit_text_length(cleaned_text)

        # Perform inference using the pipeline
        result = classifier(limited_text)

        predicted_label = result[0]['label']
        predicted_probability = result[0]['score']
        summary = summarizer(extracted_text, max_length=1000, min_length=30, do_sample=False)[0]['summary_text']
        
        return {
            "predicted_label": predicted_label,
            "evaluation": predicted_probability,
            "summary": summary
        }

    except Exception as e:
        # Log the error details
        print(f"Error in predict_pptx_content: {e}")
        return {"error": str(e)}

# Define the Gradio interface
iface = gr.Interface(
    fn=predict_pptx_content,
    inputs=gr.File(type="file", label="Upload PowerPoint (.pptx) file"),
    outputs=gr.Textbox("output"),
    live=False,
    title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>",
)

# Deploy the Gradio interface
iface.launch(share=True)