Spaces:

yellowcandle
/

whisper-v3-gradio

Sleeping

App Files Files Community

yellowcandle commited on Jun 18, 2024

Commit

d1c3a70

•

1 Parent(s): f6b2f01

fix errors

Browse files

Files changed (1) hide show

app.py +10 -5

app.py CHANGED Viewed

@@ -8,6 +8,8 @@ from transformers import AutoModelForSpeechSeq2Seq, AutoProcessor, pipeline, Aut
 def transcribe_audio(audio, model_id):
     if audio is None:
         return "Please upload an audio file."
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
@@ -34,21 +36,24 @@ def transcribe_audio(audio, model_id):
     result = pipe(audio)
     return result["text"]
-@spaces.GPU(duration=60)
-def proofread(text):
     if text is None:
         return "Please provide the transcribed text for proofreading."
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
     model = AutoModelForCausalLM.from_pretrained("hfl/llama-3-chinese-8b-instruct-v3")
     model.to(device)
     # Perform proofreading using the model
-    input_ids = model.tokenizer.encode(text, return_tensors="pt").to(device)
     output = model.generate(input_ids, max_length=len(input_ids[0])+50, num_return_sequences=1, temperature=0.7)
-    proofread_text = model.tokenizer.decode(output[0], skip_special_tokens=True)
     return proofread_text
@@ -59,7 +64,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         audio = gr.Audio(sources="upload", type="filepath")
-        model_dropdown = gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"])
     transcribe_button = gr.Button("Transcribe")
     transcribed_text = gr.Textbox(label="Transcribed Text")

 def transcribe_audio(audio, model_id):
     if audio is None:
         return "Please upload an audio file."
+    if model_id is None:
+        return "Please select a model."
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
     result = pipe(audio)
     return result["text"]
+@spaces.GPU(duration=180)
+def proofread(prompt, text):
     if text is None:
         return "Please provide the transcribed text for proofreading."
     device = "cuda:0" if torch.cuda.is_available() else "cpu"
     torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    prompt = "用繁體中文整理這段文字，在最後加上整段文字的重點。"
     model = AutoModelForCausalLM.from_pretrained("hfl/llama-3-chinese-8b-instruct-v3")
+    tokenizer = AutoTokenizer.from_pretrained("hfl/llama-3-chinese-8b-instruct-v3")
     model.to(device)
     # Perform proofreading using the model
+    input_ids = tokenizer.encode(text, return_tensors="pt").to(device)
     output = model.generate(input_ids, max_length=len(input_ids[0])+50, num_return_sequences=1, temperature=0.7)
+    proofread_text = tokenizer.decode(output[0], skip_special_tokens=True)
     return proofread_text
     with gr.Row():
         audio = gr.Audio(sources="upload", type="filepath")
+        model_dropdown = gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"], value="openai/whisper-large-v3")
     transcribe_button = gr.Button("Transcribe")
     transcribed_text = gr.Textbox(label="Transcribed Text")