|
|
|
|
|
from pptx import Presentation |
|
import re |
|
from transformers import pipeline |
|
|
|
def extract_text_from_pptx(file_path): |
|
presentation = Presentation(file_path) |
|
|
|
text = [] |
|
for slide_number, slide in enumerate(presentation.slides, start=1): |
|
for shape in slide.shapes: |
|
if hasattr(shape, "text"): |
|
text.append(shape.text) |
|
|
|
return "\n".join(text) |
|
|
|
def main(): |
|
file_path = "path/to/your/powerpoint.pptx" |
|
|
|
extracted_text = extract_text_from_pptx(file_path) |
|
cleaned_text = re.sub(r'\s+', ' ', extracted_text) |
|
|
|
print(cleaned_text) |
|
|
|
classifier = pipeline("text-classification", model="Ahmed235/roberta_classification") |
|
summarizer = pipeline("summarization", model="Falconsai/text_summarization") |
|
|
|
result = classifier(cleaned_text)[0] |
|
predicted_label = result['label'] |
|
predicted_probability = result['score'] |
|
|
|
print("Predicted Label:", predicted_label) |
|
print(f"Evaluate the topic according to {predicted_label} is: {predicted_probability}") |
|
print(summarizer(cleaned_text, max_length=80, min_length=30, do_sample=False)) |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|