import os | |
import tempfile | |
import numpy as np | |
from subprocess import Popen, PIPE | |
import torch | |
import gradio as gr | |
from pydub import AudioSegment | |
from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
from transformers.pipelines.audio_utils import ffmpeg_read | |
from sentence_transformers import SentenceTransformer, util | |
import spacy | |
import spacy.cli | |
spacy.cli.download("en_core_web_sm") | |
# Constants | |
MODEL_NAME = "openai/whisper-large-v3-turbo" | |
BATCH_SIZE = 8 | |
FILE_LIMIT_MB = 1000 | |
device = 0 if torch.cuda.is_available() else "cpu" | |
# Whisper pipeline | |
whisper_pipeline = pipeline( | |
task="automatic-speech-recognition", | |
model=MODEL_NAME, | |
chunk_length_s=30, | |
device=device, | |
) | |
# NLP model and other helpers | |
nlp = spacy.load("en_core_web_sm") | |
embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
# Summarization model | |
summarizer_model_name = "Mahalingam/DistilBart-Med-Summary" | |
tokenizer = AutoTokenizer.from_pretrained(summarizer_model_name) | |
summarizer_model = AutoModelForSeq2SeqLM.from_pretrained(summarizer_model_name) | |
summarizer = pipeline("summarization", model=summarizer_model, tokenizer=tokenizer) | |
# SOAP prompts and embeddings | |
soap_prompts = { | |
"subjective": "Personal reports, symptoms described by patients, or personal health concerns. Details reflecting individual symptoms or health descriptions.", | |
"objective": "Observable facts, clinical findings, professional observations, specific medical specialties, and diagnoses.", | |
"assessment": "Clinical assessments, expertise-based opinions on conditions, and significance of medical interventions. Focused on medical evaluations or patient condition summaries.", | |
"plan": "Future steps, recommendations for treatment, follow-up instructions, and healthcare management plans." | |
} | |
soap_embeddings = {section: embedder.encode(prompt, convert_to_tensor=True) for section, prompt in soap_prompts.items()} | |
# Convert MP4 to MP3 | |
def convert_mp4_to_mp3(mp4_path, mp3_path): | |
try: | |
audio = AudioSegment.from_file(mp4_path, format="mp4") | |
audio.export(mp3_path, format="mp3") | |
except Exception as e: | |
raise RuntimeError(f"Error converting MP4 to MP3: {e}") | |
# Transcribe audio | |
def transcribe_audio(audio_path): | |
try: | |
if not os.path.exists(audio_path): | |
raise FileNotFoundError(f"Audio file not found: {audio_path}") | |
# Read and process the audio file | |
audio_array = ffmpeg_read(audio_path, whisper_pipeline.feature_extractor.sampling_rate) | |
# Ensure audio data is a numpy array of type float32 | |
if not isinstance(audio_array, np.ndarray): | |
raise TypeError("Audio data should be a numpy array.") | |
audio_array = audio_array.astype(np.float32) | |
# Create input dictionary for Whisper | |
inputs = { | |
"array": audio_array, | |
"sampling_rate": whisper_pipeline.feature_extractor.sampling_rate, | |
} | |
# Perform transcription | |
result = whisper_pipeline(inputs, batch_size=BATCH_SIZE, return_timestamps=False) | |
return result["text"] | |
except Exception as e: | |
return f"Error during transcription: {e}" | |
# Classify the sentence to the correct SOAP section | |
def classify_sentence(sentence): | |
similarities = {section: util.pytorch_cos_sim(embedder.encode(sentence), soap_embeddings[section]) for section in soap_prompts.keys()} | |
return max(similarities, key=similarities.get) | |
# Summarize the section if it's too long | |
def summarize_section(section_text): | |
if len(section_text.split()) < 50: | |
return section_text | |
target_length = int(len(section_text.split()) * 0.50) | |
inputs = tokenizer.encode(section_text, return_tensors="pt", truncation=True, max_length=1024) | |
summary_ids = summarizer_model.generate( | |
inputs, | |
max_length=target_length, | |
min_length=int(target_length * 0.45), | |
length_penalty=1.0, | |
num_beams=4 | |
) | |
return tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
# Analyze the SOAP content and divide into sections | |
def soap_analysis(text): | |
doc = nlp(text) | |
soap_note = {section: "" for section in soap_prompts.keys()} | |
for sentence in doc.sents: | |
section = classify_sentence(sentence.text) | |
soap_note[section] += sentence.text + " " | |
# Summarize each section of the SOAP note | |
for section in soap_note: | |
soap_note[section] = summarize_section(soap_note[section].strip()) | |
return format_soap_output(soap_note) | |
# Format the SOAP note output | |
def format_soap_output(soap_note): | |
return ( | |
f"Subjective:\n{soap_note['subjective']}\n\n" | |
f"Objective:\n{soap_note['objective']}\n\n" | |
f"Assessment:\n{soap_note['assessment']}\n\n" | |
f"Plan:\n{soap_note['plan']}\n" | |
) | |
# Process file function for audio/video to SOAP | |
def process_file(file, user_prompt): | |
# Determine file type and convert if necessary | |
if file.name.endswith(".mp4"): | |
temp_mp3_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name | |
try: | |
convert_mp4_to_mp3(file.name, temp_mp3_path) | |
audio_path = temp_mp3_path | |
except Exception as e: | |
return f"Error during MP4 to MP3 conversion: {e}", "", "" | |
else: | |
audio_path = file.name | |
# Transcribe audio | |
transcription = transcribe_audio(audio_path) | |
print("Transcribed Text: ", transcription) | |
# Perform SOAP analysis | |
soap_note = soap_analysis(transcription) | |
print("SOAP Notes: ", soap_note) | |
# Clean up temporary files | |
if file.name.endswith(".mp4"): | |
os.remove(temp_mp3_path) | |
return soap_note | |
# Process text function for text input to SOAP | |
def process_text(text, user_prompt): | |
soap_note = soap_analysis(text) | |
print(soap_note) | |
return soap_note | |
# Gradio interface | |
def launch_gradio(): | |
with gr.Blocks(theme=gr.themes.Default()) as demo: | |
gr.Markdown("# Enhanced Video to SOAP Note Generator") | |
with gr.Tab("Audio/Video File to SOAP"): | |
gr.Interface( | |
fn=process_file, | |
inputs=[gr.File(label="Upload Audio/Video File"), gr.Textbox(label="Enter Prompt for Template", placeholder="Enter a detailed prompt...", lines=6)], | |
outputs=[ | |
gr.Textbox(label="SOAP Note"), | |
], | |
) | |
with gr.Tab("Text Input to SOAP"): | |
gr.Interface( | |
fn=process_text, | |
inputs=[gr.Textbox(label="Enter Text", placeholder="Enter medical notes...", lines=6), gr.Textbox(label="Enter Prompt for Template", placeholder="Enter a detailed prompt...", lines=6)], | |
outputs=[ | |
gr.Textbox(label="SOAP Note"), | |
], | |
) | |
demo.launch(share=True, debug=True) | |
# Run the Gradio app | |
if __name__ == "__main__": | |
launch_gradio() | |
# import os | |
# import tempfile | |
# from subprocess import Popen, PIPE | |
# import torch | |
# import gradio as gr | |
# from pydub import AudioSegment | |
# from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM | |
# from transformers.pipelines.audio_utils import ffmpeg_read | |
# from sentence_transformers import SentenceTransformer, util | |
# import spacy | |
# import spacy.cli | |
# spacy.cli.download("en_core_web_sm") | |
# # Constants | |
# MODEL_NAME = "openai/whisper-large-v3-turbo" | |
# BATCH_SIZE = 8 | |
# FILE_LIMIT_MB = 1000 | |
# device = 0 if torch.cuda.is_available() else "cpu" | |
# # Whisper pipeline | |
# whisper_pipeline = pipeline( | |
# task="automatic-speech-recognition", | |
# model=MODEL_NAME, | |
# chunk_length_s=30, | |
# device=device, | |
# ) | |
# # NLP model and other helpers | |
# nlp = spacy.load("en_core_web_sm") | |
# embedder = SentenceTransformer("all-MiniLM-L6-v2") | |
# # Summarization model | |
# summarizer_model_name = "Mahalingam/DistilBart-Med-Summary" | |
# tokenizer = AutoTokenizer.from_pretrained(summarizer_model_name) | |
# summarizer_model = AutoModelForSeq2SeqLM.from_pretrained(summarizer_model_name) | |
# summarizer = pipeline("summarization", model=summarizer_model, tokenizer=tokenizer) | |
# # SOAP prompts and embeddings | |
# soap_prompts = { | |
# "subjective": "Personal reports, symptoms described by patients, or personal health concerns. Details reflecting individual symptoms or health descriptions.", | |
# "objective": "Observable facts, clinical findings, professional observations, specific medical specialties, and diagnoses.", | |
# "assessment": "Clinical assessments, expertise-based opinions on conditions, and significance of medical interventions. Focused on medical evaluations or patient condition summaries.", | |
# "plan": "Future steps, recommendations for treatment, follow-up instructions, and healthcare management plans." | |
# } | |
# soap_embeddings = {section: embedder.encode(prompt, convert_to_tensor=True) for section, prompt in soap_prompts.items()} | |
# # Convert MP4 to MP3 | |
# def convert_mp4_to_mp3(mp4_path, mp3_path): | |
# try: | |
# audio = AudioSegment.from_file(mp4_path, format="mp4") | |
# audio.export(mp3_path, format="mp3") | |
# except Exception as e: | |
# raise RuntimeError(f"Error converting MP4 to MP3: {e}") | |
# # Transcribe audio | |
# def transcribe_audio(audio_path): | |
# try: | |
# if not os.path.exists(audio_path): | |
# raise FileNotFoundError(f"Audio file not found: {audio_path}") | |
# # Read the audio file and prepare inputs for Whisper | |
# inputs = ffmpeg_read(audio_path, whisper_pipeline.feature_extractor.sampling_rate) | |
# inputs = {"array": inputs, "sampling_rate": whisper_pipeline.feature_extractor.sampling_rate} | |
# # Perform transcription using Whisper | |
# result = whisper_pipeline(inputs, batch_size=BATCH_SIZE, return_timestamps=False) | |
# return result["text"] | |
# except Exception as e: | |
# return f"Error during transcription: {e}" | |
# # Classify the sentence to the correct SOAP section | |
# def classify_sentence(sentence): | |
# similarities = {section: util.pytorch_cos_sim(embedder.encode(sentence), soap_embeddings[section]) for section in soap_prompts.keys()} | |
# return max(similarities, key=similarities.get) | |
# # Summarize the section if it's too long | |
# def summarize_section(section_text): | |
# if len(section_text.split()) < 50: | |
# return section_text | |
# target_length = int(len(section_text.split()) * 0.50) | |
# inputs = tokenizer.encode(section_text, return_tensors="pt", truncation=True, max_length=1024) | |
# summary_ids = summarizer_model.generate( | |
# inputs, | |
# max_length=target_length, | |
# min_length=int(target_length * 0.45), | |
# length_penalty=1.0, | |
# num_beams=4 | |
# ) | |
# return tokenizer.decode(summary_ids[0], skip_special_tokens=True) | |
# # Analyze the SOAP content and divide into sections | |
# def soap_analysis(text): | |
# doc = nlp(text) | |
# soap_note = {section: "" for section in soap_prompts.keys()} | |
# for sentence in doc.sents: | |
# section = classify_sentence(sentence.text) | |
# soap_note[section] += sentence.text + " " | |
# # Summarize each section of the SOAP note | |
# for section in soap_note: | |
# soap_note[section] = summarize_section(soap_note[section].strip()) | |
# return format_soap_output(soap_note) | |
# # Format the SOAP note output | |
# def format_soap_output(soap_note): | |
# return ( | |
# f"Subjective:\n{soap_note['subjective']}\n\n" | |
# f"Objective:\n{soap_note['objective']}\n\n" | |
# f"Assessment:\n{soap_note['assessment']}\n\n" | |
# f"Plan:\n{soap_note['plan']}\n" | |
# ) | |
# # Process file function for audio/video to SOAP | |
# def process_file(file, user_prompt): | |
# # Determine file type and convert if necessary | |
# if file.name.endswith(".mp4"): | |
# temp_mp3_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name | |
# try: | |
# convert_mp4_to_mp3(file.name, temp_mp3_path) | |
# audio_path = temp_mp3_path | |
# except Exception as e: | |
# return f"Error during MP4 to MP3 conversion: {e}", "", "" | |
# else: | |
# audio_path = file.name | |
# # Transcribe audio | |
# transcription = transcribe_audio(audio_path) | |
# print("Transcribed Text: ", transcription) | |
# # Perform SOAP analysis | |
# soap_note = soap_analysis(transcription) | |
# print("SOAP Notes: ", soap_note) | |
# # # Generate template and JSON using LLaMA | |
# # template_output = llama_query(user_prompt, soap_note) | |
# # print("Template: ", template_output) | |
# # json_output = llama_convert_to_json(template_output) | |
# # Clean up temporary files | |
# if file.name.endswith(".mp4"): | |
# os.remove(temp_mp3_path) | |
# return soap_note#, template_output, json_output | |
# # Process text function for text input to SOAP | |
# def process_text(text, user_prompt): | |
# soap_note = soap_analysis(text) | |
# print(soap_note) | |
# # template_output = llama_query(user_prompt, soap_note) | |
# # print(template_output) | |
# # json_output = llama_convert_to_json(template_output) | |
# return soap_note#, template_output, json_output | |
# # # Llama query function | |
# # def llama_query(user_prompt, soap_note, model="llama3.2"): | |
# # combined_prompt = f"User Instructions:\n{user_prompt}\n\nContext:\n{soap_note}" | |
# # try: | |
# # process = Popen(['ollama', 'run', model], stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True, encoding='utf-8') | |
# # stdout, stderr = process.communicate(input=combined_prompt) | |
# # if process.returncode != 0: | |
# # return f"Error: {stderr.strip()}" | |
# # return stdout.strip() | |
# # except Exception as e: | |
# # return f"Unexpected error: {str(e)}" | |
# # # Convert the response to JSON format | |
# # def llama_convert_to_json(template_output, model="llama3.2"): | |
# # json_prompt = f"Convert the following template into a structured JSON format:\n\n{template_output}" | |
# # try: | |
# # process = Popen(['ollama', 'run', model], stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True, encoding='utf-8') | |
# # stdout, stderr = process.communicate(input=json_prompt) | |
# # if process.returncode != 0: | |
# # return f"Error: {stderr.strip()}" | |
# # return stdout.strip() # Assuming the model outputs a valid JSON string | |
# # except Exception as e: | |
# # return f"Unexpected error: {str(e)}" | |
# # Gradio interface | |
# def launch_gradio(): | |
# with gr.Blocks(theme=gr.themes.Default()) as demo: | |
# gr.Markdown("# Enhanced Video to SOAP Note Generator") | |
# with gr.Tab("Audio/Video File to SOAP"): | |
# gr.Interface( | |
# fn=process_file, | |
# inputs=[gr.File(label="Upload Audio/Video File"), gr.Textbox(label="Enter Prompt for Template", placeholder="Enter a detailed prompt...", lines=6)], | |
# outputs=[ | |
# gr.Textbox(label="SOAP Note"), | |
# # gr.Textbox(label="Generated Template from LLaMA"), | |
# # gr.Textbox(label="JSON Output") | |
# ], | |
# ) | |
# with gr.Tab("Text Input to SOAP"): | |
# gr.Interface( | |
# fn=process_text, | |
# inputs=[gr.Textbox(label="Enter Text", placeholder="Enter medical notes...", lines=6), gr.Textbox(label="Enter Prompt for Template", placeholder="Enter a detailed prompt...", lines=6)], | |
# outputs=[ | |
# gr.Textbox(label="SOAP Note"), | |
# # gr.Textbox(label="Generated Template from LLaMA"), | |
# # gr.Textbox(label="JSON Output") | |
# ], | |
# ) | |
# demo.launch(share=True, debug=True) | |
# # Run the Gradio app | |
# if __name__ == "__main__": | |
# launch_gradio() | |