Ahmed235 commited on
Commit
f4067be
·
verified ·
1 Parent(s): 6e4c777

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -27
app.py CHANGED
@@ -1,55 +1,62 @@
1
  from pptx import Presentation
2
  import re
3
- from transformers import pipeline
4
  import gradio as gr
 
 
 
 
 
 
 
 
 
 
 
5
 
6
  def extract_text_from_pptx(file_path):
7
  presentation = Presentation(file_path)
8
-
9
  text = []
10
  for slide_number, slide in enumerate(presentation.slides, start=1):
11
  for shape in slide.shapes:
12
  if hasattr(shape, "text"):
13
  text.append(shape.text)
14
-
15
  return "\n".join(text)
16
 
17
-
18
  def predict_pptx_content(file_path):
19
- print(f"File path received: {file_path}")
20
- try:
21
- extracted_text = extract_text_from_pptx(file_path)
22
- print(f"Extracted text: {extracted_text}")
23
 
24
- cleaned_text = re.sub(r'\s+', ' ', extracted_text)
25
- print(f"Cleaned text: {cleaned_text}")
26
 
27
- classifier = pipeline("text-classification", model="Ahmed235/roberta_classification")
 
 
 
28
 
29
- # summarizer = pipeline("summarization", model="Falconsai/text_summarization")
30
 
31
- result = classifier(cleaned_text)[0]
32
- predicted_label = result['label']
33
- predicted_probability = result['score']
34
 
35
- prediction = {
36
- "Predicted Label": predicted_label,
37
- "Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}"
38
- # "Summary": summarizer(cleaned_text, max_length=80, min_length=30, do_sample=False)
39
- }
40
 
41
- return prediction
42
- except Exception as e:
43
- print(f"Error processing file: {e}")
44
- return {"error": str(e)}
 
45
 
 
46
 
47
  # Define the Gradio interface
48
  iface = gr.Interface(
49
  fn=predict_pptx_content,
50
- inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
51
- outputs=["text", "text"], # Predicted Label, Evaluation, Summary
52
- live=False, # Change to False for one-time analysis
53
  title="<h1 style='color: lightgreen; text-align: center;'>PPTX Analyzer</h1>",
54
  )
55
 
 
1
  from pptx import Presentation
2
  import re
 
3
  import gradio as gr
4
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
+ import torch
6
+ import torch.nn.functional as F
7
+ from transformers import pipeline
8
+
9
+ # Load the pre-trained model and tokenizer
10
+ tokenizer = AutoTokenizer.from_pretrained("Ahmed235/roberta_classification")
11
+ model = AutoModelForSequenceClassification.from_pretrained("Ahmed235/roberta_classification")
12
+
13
+ # Create a summarization pipeline
14
+ summarizer = pipeline("summarization", model="Falconsai/text_summarization")
15
 
16
  def extract_text_from_pptx(file_path):
17
  presentation = Presentation(file_path)
 
18
  text = []
19
  for slide_number, slide in enumerate(presentation.slides, start=1):
20
  for shape in slide.shapes:
21
  if hasattr(shape, "text"):
22
  text.append(shape.text)
 
23
  return "\n".join(text)
24
 
 
25
  def predict_pptx_content(file_path):
26
+ extracted_text = extract_text_from_pptx(file_path)
27
+ cleaned_text = re.sub(r'\s+', ' ', extracted_text)
 
 
28
 
29
+ # Tokenize and encode the cleaned text
30
+ input_encoding = tokenizer(cleaned_text, truncation=True, padding=True, return_tensors="pt")
31
 
32
+ # Perform inference
33
+ with torch.no_grad():
34
+ outputs = model(**input_encoding)
35
+ logits = outputs.logits
36
 
37
+ probabilities = F.softmax(logits, dim=1)
38
 
39
+ predicted_label_id = torch.argmax(logits, dim=1).item()
40
+ predicted_label = model.config.id2label[predicted_label_id]
41
+ predicted_probability = probabilities[0][predicted_label_id].item()
42
 
43
+ # Summarize the cleaned text
44
+ summary = summarizer(cleaned_text, max_length=80, min_length=30, do_sample=False)[0]['summary_text']
 
 
 
45
 
46
+ prediction = {
47
+ "Predicted Label": predicted_label,
48
+ "Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}",
49
+ "Summary": summary
50
+ }
51
 
52
+ return prediction
53
 
54
  # Define the Gradio interface
55
  iface = gr.Interface(
56
  fn=predict_pptx_content,
57
+ inputs=gr.File(type="file", label="Upload PowerPoint (.pptx) file"),
58
+ outputs=["text", "text", "text"], # Predicted Label, Evaluation, Summary
59
+ live=False, # Change to True for one-time analysis
60
  title="<h1 style='color: lightgreen; text-align: center;'>PPTX Analyzer</h1>",
61
  )
62