Ahmed235 commited on
Commit
98c0f54
·
verified ·
1 Parent(s): 8cb1867

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +27 -9
app.py CHANGED
@@ -1,10 +1,15 @@
1
- import gradio as gr
2
  from pptx import Presentation
3
  import re
4
- from transformers import pipeline
 
 
 
5
 
6
- # Create a summarization pipeline
7
- summarizer = pipeline("summarization", model="Falconsai/text_summarization")
 
 
 
8
 
9
  def extract_text_from_pptx(file_path):
10
  presentation = Presentation(file_path)
@@ -20,11 +25,24 @@ def predict_pptx_content(file_path):
20
  extracted_text = extract_text_from_pptx(file_path)
21
  cleaned_text = re.sub(r'\s+', ' ', extracted_text)
22
 
23
- # Summarize the cleaned text
24
- summary = summarizer(cleaned_text, max_length=80, min_length=30, do_sample=False)[0]['summary_text']
 
 
 
 
 
 
 
 
 
 
 
 
25
 
26
  prediction = {
27
- "Summary": summary
 
28
  }
29
 
30
  return prediction
@@ -38,10 +56,10 @@ def predict_pptx_content(file_path):
38
  iface = gr.Interface(
39
  fn=predict_pptx_content,
40
  inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
41
- outputs="text", # Only output the summary
42
  live=False, # Change to True for one-time analysis
43
  title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>",
44
  )
45
 
46
  # Deploy the Gradio interface
47
- iface.launch(share=True)
 
 
1
  from pptx import Presentation
2
  import re
3
+ import gradio as gr
4
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
5
+ import torch
6
+ import torch.nn.functional as F
7
 
8
+ # Load the pre-trained model and tokenizer
9
+ tokenizer = AutoTokenizer.from_pretrained("Ahmed235/roberta_classification")
10
+ model = AutoModelForSequenceClassification.from_pretrained("Ahmed235/roberta_classification")
11
+ device = torch.device("cpu")
12
+ model = model.to(device) # Move the model to the CPU
13
 
14
  def extract_text_from_pptx(file_path):
15
  presentation = Presentation(file_path)
 
25
  extracted_text = extract_text_from_pptx(file_path)
26
  cleaned_text = re.sub(r'\s+', ' ', extracted_text)
27
 
28
+ # Tokenize and encode the cleaned text
29
+ input_encoding = tokenizer(cleaned_text, truncation=True, padding=True, return_tensors="pt")
30
+ input_encoding = {key: val.to(device) for key, val in input_encoding.items()} # Move input tensor to CPU
31
+
32
+ # Perform inference
33
+ with torch.no_grad():
34
+ outputs = model(**input_encoding)
35
+ logits = outputs.logits
36
+
37
+ probabilities = F.softmax(logits, dim=1)
38
+
39
+ predicted_label_id = torch.argmax(logits, dim=1).item()
40
+ predicted_label = model.config.id2label[predicted_label_id]
41
+ predicted_probability = probabilities[0][predicted_label_id].item()
42
 
43
  prediction = {
44
+ "Predicted Label": predicted_label,
45
+ "Evaluation": f"Evaluate the topic according to {predicted_label} is: {predicted_probability}",
46
  }
47
 
48
  return prediction
 
56
  iface = gr.Interface(
57
  fn=predict_pptx_content,
58
  inputs=gr.File(type="filepath", label="Upload PowerPoint (.pptx) file"),
59
+ outputs=["text", "text"], # Predicted Label, Evaluation
60
  live=False, # Change to True for one-time analysis
61
  title="<h1 style='color: lightgreen; text-align: center;'>HackTalk Analyzer</h1>",
62
  )
63
 
64
  # Deploy the Gradio interface
65
+ iface.launch(share=True)