import gradio as gr from transformers import pipeline from pptx import Presentation import re # Create a text classification pipeline classifier = pipeline("text-classification", model="Ahmed235/roberta_classification", tokenizer="Ahmed235/roberta_classification") summarizer = pipeline("summarization", model="Falconsai/text_summarization") def extract_text_from_pptx(file_path): presentation = Presentation(file_path) text = [] for slide_number, slide in enumerate(presentation.slides, start=1): for shape in slide.shapes: if hasattr(shape, "text"): text.append(shape.text) return "\n".join(text) def limit_text_length(text, max_length=512): # Truncate or limit the text length return text[:max_length] def predict_pptx_content(file_path): try: extracted_text = extract_text_from_pptx(file_path) cleaned_text = re.sub(r'\s+', ' ', extracted_text) # Limit text length before classification limited_text = limit_text_length(cleaned_text) # Perform inference using the pipeline result = classifier(limited_text) predicted_label = result[0]['label'] predicted_probability = result[0]['score'] summary = summarizer(extracted_text, max_length=80, min_length=30, do_sample=False)[0]['summary_text'] prediction = { "Predicted_Label": f"{predicted_label}\n", "Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}\n", "Summary": summary, } return prediction except Exception as e: # Log the error details print(f"Error in predict_pptx_content: {e}") return {"error": str(e)} # Define the Gradio interface iface = gr.Interface( fn=predict_pptx_content, inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"), outputs=["text"], live=False, title="

HackTalk Analyzer

", ) # Deploy the Gradio interface iface.launch(share=True)