NeerAbhy commited on
Commit
efe8c9c
·
verified ·
1 Parent(s): 40c7b63

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -15
app.py CHANGED
@@ -21,16 +21,16 @@ summarization_model_names = [
21
 
22
  # Placeholder for the summarizer pipeline, tokenizer, and maximum tokens
23
  summarizer = None
24
- tokenizer = None
25
  max_tokens = None
26
 
27
 
28
  # Function to load the selected model
29
  def load_summarization_model(model_name):
30
- global summarizer, tokenizer, max_tokens
31
  try:
32
  summarizer = pipeline("summarization", model=model_name, torch_dtype=torch.bfloat16)
33
- tokenizer = AutoTokenizer.from_pretrained(model_name)
34
  config = AutoConfig.from_pretrained(model_name)
35
 
36
  if hasattr(config, 'max_position_embeddings'):
@@ -51,7 +51,7 @@ def summarize_text(input, min_length, max_length):
51
  if summarizer is None:
52
  return "No model loaded!"
53
 
54
- input_tokens = tokenizer.encode(input, return_tensors="pt")
55
  num_tokens = input_tokens.shape[1]
56
  if num_tokens > max_tokens:
57
  return f"Error: The input text has {num_tokens} tokens, which exceeds the maximum allowed {max_tokens} tokens. Please enter shorter text."
@@ -79,11 +79,9 @@ tokenizer = M2M100Tokenizer.from_pretrained(pretrained_model, cache_dir=cache_di
79
  translation_model = M2M100ForConditionalGeneration.from_pretrained(
80
  pretrained_model, cache_dir=cache_dir)
81
 
82
- transcription = pipeline("automatic-speech-recognition", model= "openai/whisper-base")
83
- clasification = pipeline(
84
- "audio-classification",
85
- model="anton-l/xtreme_s_xlsr_300m_minds14",
86
- )
87
  def language_names(json_path):
88
  with open(json_path, 'r') as json_file:
89
  data = json.load(json_file)
@@ -97,13 +95,13 @@ def get_name(label):
97
  name = label2name[iso_3]
98
  return name
99
 
100
- def audio_a_text(audio):
101
- text = transcription(audio)["text"]
102
- return text
103
 
104
- def text_to_sentimient(audio):
105
- #text = transcription(audio)["text"]
106
- return clasification(audio)
107
 
108
  lang_id = {
109
  "Afrikaans": "af",
@@ -229,6 +227,7 @@ with demo:
229
  text = gr.Textbox()
230
  #gr.Markdown("Speech analyzer")
231
  #audio = gr.Audio(type="filepath", label = "Upload a file")
 
232
  model_dropdown = gr.Dropdown(choices = summarization_model_names, label="Choose a model", value="sshleifer/distilbart-cnn-12-6")
233
  load_message = gr.Textbox(label="Load Status", interactive=False)
234
  b1 = gr.Button("Load Model")
@@ -249,6 +248,7 @@ with demo:
249
  # inputs=[
250
  # source_lang])
251
  #b1 = gr.Button("convert to text")
 
252
  b3 = gr.Button("translate")
253
  b3.click(translation_text, inputs = [source_lang, target_lang, text0], outputs = text)
254
  #b1.click(audio_a_text, inputs=audio, outputs=text)
 
21
 
22
  # Placeholder for the summarizer pipeline, tokenizer, and maximum tokens
23
  summarizer = None
24
+ tokenizer_sum = None
25
  max_tokens = None
26
 
27
 
28
  # Function to load the selected model
29
  def load_summarization_model(model_name):
30
+ global summarizer, tokenizer_sum, max_tokens
31
  try:
32
  summarizer = pipeline("summarization", model=model_name, torch_dtype=torch.bfloat16)
33
+ tokenizer_sum = AutoTokenizer.from_pretrained(model_name)
34
  config = AutoConfig.from_pretrained(model_name)
35
 
36
  if hasattr(config, 'max_position_embeddings'):
 
51
  if summarizer is None:
52
  return "No model loaded!"
53
 
54
+ input_tokens = tokenizer_sum.encode(input, return_tensors="pt")
55
  num_tokens = input_tokens.shape[1]
56
  if num_tokens > max_tokens:
57
  return f"Error: The input text has {num_tokens} tokens, which exceeds the maximum allowed {max_tokens} tokens. Please enter shorter text."
 
79
  translation_model = M2M100ForConditionalGeneration.from_pretrained(
80
  pretrained_model, cache_dir=cache_dir)
81
 
82
+ #transcription = pipeline("automatic-speech-recognition", model= "openai/whisper-base")
83
+ #clasification = pipeline("audio-classification",model="anton-l/xtreme_s_xlsr_300m_minds14",)
84
+
 
 
85
  def language_names(json_path):
86
  with open(json_path, 'r') as json_file:
87
  data = json.load(json_file)
 
95
  name = label2name[iso_3]
96
  return name
97
 
98
+ #def audio_a_text(audio):
99
+ # text = transcription(audio)["text"]
100
+ #return text
101
 
102
+ #def text_to_sentimient(audio):
103
+ # #text = transcription(audio)["text"]
104
+ # return clasification(audio)
105
 
106
  lang_id = {
107
  "Afrikaans": "af",
 
227
  text = gr.Textbox()
228
  #gr.Markdown("Speech analyzer")
229
  #audio = gr.Audio(type="filepath", label = "Upload a file")
230
+
231
  model_dropdown = gr.Dropdown(choices = summarization_model_names, label="Choose a model", value="sshleifer/distilbart-cnn-12-6")
232
  load_message = gr.Textbox(label="Load Status", interactive=False)
233
  b1 = gr.Button("Load Model")
 
248
  # inputs=[
249
  # source_lang])
250
  #b1 = gr.Button("convert to text")
251
+
252
  b3 = gr.Button("translate")
253
  b3.click(translation_text, inputs = [source_lang, target_lang, text0], outputs = text)
254
  #b1.click(audio_a_text, inputs=audio, outputs=text)