Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -21,16 +21,16 @@ summarization_model_names = [
|
|
21 |
|
22 |
# Placeholder for the summarizer pipeline, tokenizer, and maximum tokens
|
23 |
summarizer = None
|
24 |
-
|
25 |
max_tokens = None
|
26 |
|
27 |
|
28 |
# Function to load the selected model
|
29 |
def load_summarization_model(model_name):
|
30 |
-
global summarizer,
|
31 |
try:
|
32 |
summarizer = pipeline("summarization", model=model_name, torch_dtype=torch.bfloat16)
|
33 |
-
|
34 |
config = AutoConfig.from_pretrained(model_name)
|
35 |
|
36 |
if hasattr(config, 'max_position_embeddings'):
|
@@ -51,7 +51,7 @@ def summarize_text(input, min_length, max_length):
|
|
51 |
if summarizer is None:
|
52 |
return "No model loaded!"
|
53 |
|
54 |
-
input_tokens =
|
55 |
num_tokens = input_tokens.shape[1]
|
56 |
if num_tokens > max_tokens:
|
57 |
return f"Error: The input text has {num_tokens} tokens, which exceeds the maximum allowed {max_tokens} tokens. Please enter shorter text."
|
@@ -79,11 +79,9 @@ tokenizer = M2M100Tokenizer.from_pretrained(pretrained_model, cache_dir=cache_di
|
|
79 |
translation_model = M2M100ForConditionalGeneration.from_pretrained(
|
80 |
pretrained_model, cache_dir=cache_dir)
|
81 |
|
82 |
-
transcription = pipeline("automatic-speech-recognition", model= "openai/whisper-base")
|
83 |
-
clasification = pipeline(
|
84 |
-
|
85 |
-
model="anton-l/xtreme_s_xlsr_300m_minds14",
|
86 |
-
)
|
87 |
def language_names(json_path):
|
88 |
with open(json_path, 'r') as json_file:
|
89 |
data = json.load(json_file)
|
@@ -97,13 +95,13 @@ def get_name(label):
|
|
97 |
name = label2name[iso_3]
|
98 |
return name
|
99 |
|
100 |
-
def audio_a_text(audio):
|
101 |
-
|
102 |
-
return text
|
103 |
|
104 |
-
def text_to_sentimient(audio):
|
105 |
-
|
106 |
-
|
107 |
|
108 |
lang_id = {
|
109 |
"Afrikaans": "af",
|
@@ -229,6 +227,7 @@ with demo:
|
|
229 |
text = gr.Textbox()
|
230 |
#gr.Markdown("Speech analyzer")
|
231 |
#audio = gr.Audio(type="filepath", label = "Upload a file")
|
|
|
232 |
model_dropdown = gr.Dropdown(choices = summarization_model_names, label="Choose a model", value="sshleifer/distilbart-cnn-12-6")
|
233 |
load_message = gr.Textbox(label="Load Status", interactive=False)
|
234 |
b1 = gr.Button("Load Model")
|
@@ -249,6 +248,7 @@ with demo:
|
|
249 |
# inputs=[
|
250 |
# source_lang])
|
251 |
#b1 = gr.Button("convert to text")
|
|
|
252 |
b3 = gr.Button("translate")
|
253 |
b3.click(translation_text, inputs = [source_lang, target_lang, text0], outputs = text)
|
254 |
#b1.click(audio_a_text, inputs=audio, outputs=text)
|
|
|
21 |
|
22 |
# Placeholder for the summarizer pipeline, tokenizer, and maximum tokens
|
23 |
summarizer = None
|
24 |
+
tokenizer_sum = None
|
25 |
max_tokens = None
|
26 |
|
27 |
|
28 |
# Function to load the selected model
|
29 |
def load_summarization_model(model_name):
|
30 |
+
global summarizer, tokenizer_sum, max_tokens
|
31 |
try:
|
32 |
summarizer = pipeline("summarization", model=model_name, torch_dtype=torch.bfloat16)
|
33 |
+
tokenizer_sum = AutoTokenizer.from_pretrained(model_name)
|
34 |
config = AutoConfig.from_pretrained(model_name)
|
35 |
|
36 |
if hasattr(config, 'max_position_embeddings'):
|
|
|
51 |
if summarizer is None:
|
52 |
return "No model loaded!"
|
53 |
|
54 |
+
input_tokens = tokenizer_sum.encode(input, return_tensors="pt")
|
55 |
num_tokens = input_tokens.shape[1]
|
56 |
if num_tokens > max_tokens:
|
57 |
return f"Error: The input text has {num_tokens} tokens, which exceeds the maximum allowed {max_tokens} tokens. Please enter shorter text."
|
|
|
79 |
translation_model = M2M100ForConditionalGeneration.from_pretrained(
|
80 |
pretrained_model, cache_dir=cache_dir)
|
81 |
|
82 |
+
#transcription = pipeline("automatic-speech-recognition", model= "openai/whisper-base")
|
83 |
+
#clasification = pipeline("audio-classification",model="anton-l/xtreme_s_xlsr_300m_minds14",)
|
84 |
+
|
|
|
|
|
85 |
def language_names(json_path):
|
86 |
with open(json_path, 'r') as json_file:
|
87 |
data = json.load(json_file)
|
|
|
95 |
name = label2name[iso_3]
|
96 |
return name
|
97 |
|
98 |
+
#def audio_a_text(audio):
|
99 |
+
# text = transcription(audio)["text"]
|
100 |
+
#return text
|
101 |
|
102 |
+
#def text_to_sentimient(audio):
|
103 |
+
# #text = transcription(audio)["text"]
|
104 |
+
# return clasification(audio)
|
105 |
|
106 |
lang_id = {
|
107 |
"Afrikaans": "af",
|
|
|
227 |
text = gr.Textbox()
|
228 |
#gr.Markdown("Speech analyzer")
|
229 |
#audio = gr.Audio(type="filepath", label = "Upload a file")
|
230 |
+
|
231 |
model_dropdown = gr.Dropdown(choices = summarization_model_names, label="Choose a model", value="sshleifer/distilbart-cnn-12-6")
|
232 |
load_message = gr.Textbox(label="Load Status", interactive=False)
|
233 |
b1 = gr.Button("Load Model")
|
|
|
248 |
# inputs=[
|
249 |
# source_lang])
|
250 |
#b1 = gr.Button("convert to text")
|
251 |
+
|
252 |
b3 = gr.Button("translate")
|
253 |
b3.click(translation_text, inputs = [source_lang, target_lang, text0], outputs = text)
|
254 |
#b1.click(audio_a_text, inputs=audio, outputs=text)
|