Ahmed235 commited on
Commit
d6ff263
·
verified ·
1 Parent(s): 48b97ad

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -6
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import gradio as gr
2
  from transformers import pipeline
3
- from pptx import Presentation # Import the Presentation class
4
  import re
5
 
6
  # Create a text classification pipeline
@@ -16,20 +16,28 @@ def extract_text_from_pptx(file_path):
16
  text.append(shape.text)
17
  return "\n".join(text)
18
 
 
 
 
 
19
  def predict_pptx_content(file_path):
20
  try:
21
  extracted_text = extract_text_from_pptx(file_path)
22
  cleaned_text = re.sub(r'\s+', ' ', extracted_text)
 
 
 
 
23
  # Perform inference using the pipeline
24
- result = classifier(extracted_text)
25
 
26
  predicted_label = result[0]['label']
27
  predicted_probability = result[0]['score']
28
- summary = summarizer(extracted_text, max_length=222, min_length=30, do_sample=False)[0]['summary_text']
29
  prediction = {
30
  "Summary": summary,
31
  "Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}",
32
- "Predicted_Label": predicted_label, # Adjusted key to match Gradio output key
33
  }
34
 
35
  return prediction
@@ -43,8 +51,8 @@ def predict_pptx_content(file_path):
43
  iface = gr.Interface(
44
  fn=predict_pptx_content,
45
  inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
46
- outputs=["text"], # Adjusted output keys
47
- live=False, # Change to True for one-time analysis
48
  title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>",
49
  )
50
 
 
1
  import gradio as gr
2
  from transformers import pipeline
3
+ from pptx import Presentation
4
  import re
5
 
6
  # Create a text classification pipeline
 
16
  text.append(shape.text)
17
  return "\n".join(text)
18
 
19
+ def limit_text_length(text, max_length=512):
20
+ # Truncate or limit the text length
21
+ return text[:max_length]
22
+
23
  def predict_pptx_content(file_path):
24
  try:
25
  extracted_text = extract_text_from_pptx(file_path)
26
  cleaned_text = re.sub(r'\s+', ' ', extracted_text)
27
+
28
+ # Limit text length before classification
29
+ limited_text = limit_text_length(cleaned_text)
30
+
31
  # Perform inference using the pipeline
32
+ result = classifier(limited_text)
33
 
34
  predicted_label = result[0]['label']
35
  predicted_probability = result[0]['score']
36
+ summary = summarizer(extracted_text, max_length=80, min_length=30, do_sample=False)[0]['summary_text']
37
  prediction = {
38
  "Summary": summary,
39
  "Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}",
40
+ "Predicted_Label": predicted_label,
41
  }
42
 
43
  return prediction
 
51
  iface = gr.Interface(
52
  fn=predict_pptx_content,
53
  inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
54
+ outputs=[gr.Textbox("Summary"), gr.Textbox("Evaluation"), gr.Textbox("Predicted_Label")],
55
+ live=False,
56
  title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>",
57
  )
58