File size: 2,498 Bytes
44e21a6 f4067be 341ccc1 f4067be 44e21a6 2bb61b8 06bc437 2bb61b8 12b0ed7 f4067be 06bc437 f4067be 341ccc1 1aa90a2 f4067be e0cbe77 f4067be 1aa90a2 f4067be 1aa90a2 f4067be 1aa90a2 f4067be 367a8a1 f4067be 12b0ed7 7044543 f4067be 12b0ed7 6e4c777 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
from pptx import Presentation
import re
import gradio as gr
from transformers import AutoModelForSequenceClassification, AutoTokenizer
import torch
import torch.nn.functional as F
from transformers import pipeline
# Load the pre-trained model and tokenizer
tokenizer = AutoTokenizer.from_pretrained("Ahmed235/roberta_classification")
model = AutoModelForSequenceClassification.from_pretrained("Ahmed235/roberta_classification")
device = torch.device("cpu")
model = model.to(device) # Move the model to the CPU
# Create a summarization pipeline
summarizer = pipeline("summarization", model="Falconsai/text_summarization")
def extract_text_from_pptx(file_path):
presentation = Presentation(file_path)
text = []
for slide_number, slide in enumerate(presentation.slides, start=1):
for shape in slide.shapes:
if hasattr(shape, "text"):
text.append(shape.text)
return "\n".join(text)
def predict_pptx_content(file_path):
extracted_text = extract_text_from_pptx(file_path)
cleaned_text = re.sub(r'\s+', ' ', extracted_text)
# Tokenize and encode the cleaned text
input_encoding = tokenizer(cleaned_text, truncation=True, padding=True, return_tensors="pt")
input_encoding = {key: val.to(device) for key, val in input_encoding.items()} # Move input tensor to CPU
# Perform inference
with torch.no_grad():
outputs = model(**input_encoding)
logits = outputs.logits
probabilities = F.softmax(logits, dim=1)
predicted_label_id = torch.argmax(logits, dim=1).item()
predicted_label = model.config.id2label[predicted_label_id]
predicted_probability = probabilities[0][predicted_label_id].item()
# Summarize the cleaned text
summary = summarizer(cleaned_text, max_length=80, min_length=30, do_sample=False)[0]['summary_text']
prediction = {
"Predicted Label": predicted_label,
"Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}",
"Summary": summary
}
return prediction
# Define the Gradio interface
iface = gr.Interface(
fn=predict_pptx_content,
inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
outputs=["text", "text", "text"], # Predicted Label, Evaluation, Summary
live=False, # Change to True for one-time analysis
title="<h1 style='color: lightgreen; text-align: center;'>PPTX Analyzer</h1>",
)
# Deploy the Gradio interface
iface.launch(share=True)
|