KG0101 commited on
Commit
7118bdc
·
verified ·
1 Parent(s): 1b1d1f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -53
app.py CHANGED
@@ -1,16 +1,13 @@
1
  import spaces
2
  import torch
3
  import gradio as gr
4
- import yt_dlp as youtube_dl
5
- from transformers import pipeline, AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
6
- from threading import Thread
7
- import tempfile
8
  import os
9
 
10
  MODEL_NAME = "openai/whisper-large-v3-turbo"
11
  BATCH_SIZE = 8
12
  FILE_LIMIT_MB = 1000
13
- YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
14
 
15
  device = 0 if torch.cuda.is_available() else "cpu"
16
 
@@ -22,12 +19,16 @@ pipe = pipeline(
22
  device=device,
23
  )
24
 
25
- # Hugging Face Token for the LLM model
26
- HF_TOKEN = os.getenv("HF_TOKEN") # Make sure to set this in the environment variables
27
-
28
  # Load tokenizer and model for SOAP note generation
29
- tokenizer = AutoTokenizer.from_pretrained("NousResearch/Hermes-3-Llama-3.1-8B")
30
- model = AutoModelForCausalLM.from_pretrained("NousResearch/Hermes-3-Llama-3.1-8B", device_map="auto")
 
 
 
 
 
 
 
31
 
32
  # Prompt for SOAP note generation
33
  sys_prompt = "You are a world class clinical assistant."
@@ -50,50 +51,23 @@ def transcribe(inputs, task):
50
  text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
51
  return text
52
 
53
- # Function to download audio from YouTube
54
- def download_yt_audio(yt_url, filename):
55
- info_loader = youtube_dl.YoutubeDL()
56
- try:
57
- info = info_loader.extract_info(yt_url, download=False)
58
- except youtube_dl.utils.DownloadError as err:
59
- raise gr.Error(str(err))
60
-
61
- file_length_s = sum(x * int(t) for x, t in zip([3600, 60, 1], info["duration_string"].split(":")) if t.isdigit())
62
- if file_length_s > YT_LENGTH_LIMIT_S:
63
- raise gr.Error(f"Video too long. Maximum allowed duration is {YT_LENGTH_LIMIT_S / 60} minutes.")
64
-
65
- ydl_opts = {"outtmpl": filename, "format": "worstvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best"}
66
- with youtube_dl.YoutubeDL(ydl_opts) as ydl:
67
- ydl.download([yt_url])
68
-
69
- # Function to transcribe YouTube audio
70
- @spaces.GPU
71
- def yt_transcribe(yt_url, task):
72
- with tempfile.TemporaryDirectory() as tmpdirname:
73
- filepath = os.path.join(tmpdirname, "video.mp4")
74
- download_yt_audio(yt_url, filepath)
75
- with open(filepath, "rb") as f:
76
- inputs = f.read()
77
- inputs = pipe.feature_extractor.ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
78
- inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
79
- text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
80
- return f'<iframe width="500" height="320" src="https://www.youtube.com/embed/{yt_url.split("?v=")[-1]}"> </iframe>', text
81
-
82
  # Function to generate SOAP notes using LLM
83
  def generate_soap(transcribed_text):
84
- prompt = f"{sys_prompt}\n\n{task_prompt}\n{transcribed_text}"
85
- inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device)
86
- outputs = model.generate(inputs, max_new_tokens=512)
87
- return tokenizer.decode(outputs[0], skip_special_tokens=True)
 
88
 
89
  # Gradio Interfaces for different inputs
90
  demo = gr.Blocks(theme=gr.themes.Ocean())
91
 
 
92
  mf_transcribe = gr.Interface(
93
  fn=transcribe,
94
  inputs=[gr.Audio(sources="microphone", type="filepath"), gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")],
95
  outputs="text",
96
- title="Whisper Large V3 Turbo: Transcribe Audio",
97
  description="Transcribe long-form microphone or audio inputs."
98
  )
99
 
@@ -101,16 +75,10 @@ file_transcribe = gr.Interface(
101
  fn=transcribe,
102
  inputs=[gr.Audio(sources="upload", type="filepath", label="Audio file"), gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")],
103
  outputs="text",
104
- title="Whisper Large V3: Transcribe Audio"
105
- )
106
-
107
- yt_transcribe = gr.Interface(
108
- fn=yt_transcribe,
109
- inputs=[gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"), gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")],
110
- outputs=["html", "text"],
111
- title="Whisper Large V3: Transcribe YouTube"
112
  )
113
 
 
114
  soap_note = gr.Interface(
115
  fn=generate_soap,
116
  inputs="text",
@@ -119,7 +87,10 @@ soap_note = gr.Interface(
119
  description="Convert transcribed conversation to a clinical SOAP note with structured sections (Subjective, Objective, Assessment, Plan)."
120
  )
121
 
 
122
  with demo:
123
- gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe, soap_note], ["Microphone", "Audio file", "YouTube", "SOAP Note"])
 
 
124
 
125
  demo.queue().launch(ssr_mode=False)
 
1
  import spaces
2
  import torch
3
  import gradio as gr
4
+ from transformers import AutoTokenizer, LlamaForCausalLM
5
+ import bitsandbytes, flash_attn
 
 
6
  import os
7
 
8
  MODEL_NAME = "openai/whisper-large-v3-turbo"
9
  BATCH_SIZE = 8
10
  FILE_LIMIT_MB = 1000
 
11
 
12
  device = 0 if torch.cuda.is_available() else "cpu"
13
 
 
19
  device=device,
20
  )
21
 
 
 
 
22
  # Load tokenizer and model for SOAP note generation
23
+ tokenizer = AutoTokenizer.from_pretrained("NousResearch/Hermes-3-Llama-3.1-8B", trust_remote_code=True)
24
+ model = LlamaForCausalLM.from_pretrained(
25
+ "NousResearch/Hermes-3-Llama-3.1-8B",
26
+ torch_dtype=torch.float16,
27
+ device_map="auto",
28
+ load_in_8bit=False,
29
+ load_in_4bit=True,
30
+ use_flash_attention_2=True
31
+ )
32
 
33
  # Prompt for SOAP note generation
34
  sys_prompt = "You are a world class clinical assistant."
 
51
  text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
52
  return text
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
  # Function to generate SOAP notes using LLM
55
  def generate_soap(transcribed_text):
56
+ prompt = f"<|im_start|>system\n{sys_prompt}<|im_end|>\n<|im_start|>user\n{task_prompt}\n{transcribed_text}<|im_end|>\n<|im_start|>assistant"
57
+ input_ids = tokenizer(prompt, return_tensors="pt").input_ids.to("cuda")
58
+ generated_ids = model.generate(input_ids, max_new_tokens=2048, temperature=0.8, repetition_penalty=1.1, do_sample=True, eos_token_id=tokenizer.eos_token_id)
59
+ response = tokenizer.decode(generated_ids[0][input_ids.shape[-1]:], skip_special_tokens=True, clean_up_tokenization_space=True)
60
+ return response
61
 
62
  # Gradio Interfaces for different inputs
63
  demo = gr.Blocks(theme=gr.themes.Ocean())
64
 
65
+ # Interface for microphone or file transcription
66
  mf_transcribe = gr.Interface(
67
  fn=transcribe,
68
  inputs=[gr.Audio(sources="microphone", type="filepath"), gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")],
69
  outputs="text",
70
+ title="Audio Transcribe",
71
  description="Transcribe long-form microphone or audio inputs."
72
  )
73
 
 
75
  fn=transcribe,
76
  inputs=[gr.Audio(sources="upload", type="filepath", label="Audio file"), gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")],
77
  outputs="text",
78
+ title="Audio Transcribe"
 
 
 
 
 
 
 
79
  )
80
 
81
+ # SOAP Note generation interface
82
  soap_note = gr.Interface(
83
  fn=generate_soap,
84
  inputs="text",
 
87
  description="Convert transcribed conversation to a clinical SOAP note with structured sections (Subjective, Objective, Assessment, Plan)."
88
  )
89
 
90
+ # Tabbed interface integrating SOAP note below transcription
91
  with demo:
92
+ with gr.TabbedInterface([mf_transcribe, file_transcribe], ["Microphone", "Audio file"]) as transcribe_tab:
93
+ transcribe_tab.outputs[0] # Output from transcription feeds directly to SOAP note
94
+ soap_note # SOAP note interface placed directly below transcription output
95
 
96
  demo.queue().launch(ssr_mode=False)