File size: 2,171 Bytes
98c0f54 18332e8 d6ff263 bbeaa3a e993aed 09ca2da 18332e8 e025c42 09ca2da 2bb61b8 c4d5545 2bb61b8 d6ff263 12b0ed7 95d05cb 8cb1867 b246175 d6ff263 18332e8 d6ff263 98c0f54 18332e8 1b6c7fd c1117f1 9ad6938 2e63d67 ae2295f 9ad6938 9a8d31d fc6af31 9a8d31d 2e63d67 b093982 95d05cb 12b0ed7 e36289c d8e3fd6 d6ff263 bcb2ab6 12b0ed7 124a463 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
import gradio as gr
from transformers import pipeline
from pptx import Presentation
import re
import json
# Create a text classification pipeline
classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification")
summarizer = pipeline("summarization", model="Falconsai/text_summarization")
def extract_text_from_pptx(file_path):
presentation = Presentation(file_path)
text = []
for slide_number, slide in enumerate(presentation.slides, start=1):
for shape in slide.shapes:
if hasattr(shape, "text"):
text.append(shape.text)
return "\n".join(text)
def limit_text_length(text, max_length=512):
# Truncate or limit the text length
return text[:max_length]
def predict_pptx_content(file_path):
try:
extracted_text = extract_text_from_pptx(file_path)
cleaned_text = re.sub(r'\s+', ' ', extracted_text)
# Limit text length before classification
limited_text = limit_text_length(cleaned_text)
# Perform inference using the pipeline
result = classifier(limited_text)
predicted_label = result[0]['label']
predicted_probability = result[0]['score']
summary = summarizer(cleaned_text, max_length=1000, min_length=30, do_sample=False)[0]['summary_text']
output = {
"predicted_label": predicted_label,
"evaluation": predicted_probability,
"summary": summary
}
output_dict = json.dumps(output, indent = 3)
return {"output": output_dict} # Return the JSON string within a dictionary
except Exception as e:
# Log the error details
print(f"Error in predict_pptx_content: {e}")
return {"error": str(e)}
# Define the Gradio interface
iface = gr.Interface(
fn=predict_pptx_content,
inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
outputs=gr.Textbox("output"),
live=False,
title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>",
)
# Deploy the Gradio interface
iface.launch(share=True)
|