Spaces:

yellowcandle
/

whisper-v3-gradio

Sleeping

yellowcandle commited on Jun 18, 2024

Commit

e648c2d

unverified ·

1 Parent(s): c7d8815

added proofread function

Files changed (1) hide show

app.py CHANGED Viewed

@@ -35,10 +35,37 @@ def transcribe_audio(audio, model_id):
     result = pipe(audio)
     return result["text"]
-demo = gr.Interface(fn=transcribe_audio,
-                    inputs=[gr.Audio(sources="upload", type="filepath"), gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"])],
-                    outputs="text")
 demo.launch()

     result = pipe(audio)
     return result["text"]
+@spaces.GPU(duration=60)
+def proofread(text):
+    if text is None:
+        return "Please provide the transcribed text for proofreading."
+    device = "cuda:0" if torch.cuda.is_available() else "cpu"
+    torch_dtype = torch.float16 if torch.cuda.is_available() else torch.float32
+    model = AutoModelForCausalLM.from_pretrained("hfl/llama-3-chinese-8b-instruct-v3")
+    model.to(device)
+    # Perform proofreading using the model
+    input_ids = model.tokenizer.encode(text, return_tensors="pt").to(device)
+    output = model.generate(input_ids, max_length=len(input_ids[0])+50, num_return_sequences=1, temperature=0.7)
+    proofread_text = model.tokenizer.decode(output[0], skip_special_tokens=True)
+    return proofread_text
+demo = gr.Interface(
+    [transcribe_audio, proofread],
+    [
+        gr.Audio(sources="upload", type="filepath"),
+        gr.Dropdown(choices=["openai/whisper-large-v3", "alvanlii/whisper-small-cantonese"]),
+        "text"
+    ],
+    "text",
+    allow_flagging="never",
+    title="Audio Transcription and Proofreading",
+    description="Upload an audio file, select a model for transcription, and then proofread the transcribed text.",
+)
 demo.launch()