import os import tempfile from subprocess import Popen, PIPE import torch import gradio as gr from pydub import AudioSegment from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM from transformers.pipelines.audio_utils import ffmpeg_read from sentence_transformers import SentenceTransformer, util import spacy # Constants MODEL_NAME = "openai/whisper-large-v3-turbo" BATCH_SIZE = 8 FILE_LIMIT_MB = 1000 device = 0 if torch.cuda.is_available() else "cpu" # Whisper pipeline whisper_pipeline = pipeline( task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, device=device, ) # NLP model and other helpers nlp = spacy.load("en_core_web_sm") embedder = SentenceTransformer("all-MiniLM-L6-v2") # Summarization model summarizer_model_name = "Mahalingam/DistilBart-Med-Summary" tokenizer = AutoTokenizer.from_pretrained(summarizer_model_name) summarizer_model = AutoModelForSeq2SeqLM.from_pretrained(summarizer_model_name) summarizer = pipeline("summarization", model=summarizer_model, tokenizer=tokenizer) # SOAP prompts and embeddings soap_prompts = { "subjective": "Personal reports, symptoms described by patients, or personal health concerns. Details reflecting individual symptoms or health descriptions.", "objective": "Observable facts, clinical findings, professional observations, specific medical specialties, and diagnoses.", "assessment": "Clinical assessments, expertise-based opinions on conditions, and significance of medical interventions. Focused on medical evaluations or patient condition summaries.", "plan": "Future steps, recommendations for treatment, follow-up instructions, and healthcare management plans." } soap_embeddings = {section: embedder.encode(prompt, convert_to_tensor=True) for section, prompt in soap_prompts.items()} # Convert MP4 to MP3 def convert_mp4_to_mp3(mp4_path, mp3_path): try: audio = AudioSegment.from_file(mp4_path, format="mp4") audio.export(mp3_path, format="mp3") except Exception as e: raise RuntimeError(f"Error converting MP4 to MP3: {e}") # Transcribe audio def transcribe_audio(audio_path): try: inputs = ffmpeg_read(audio_path, whisper_pipeline.feature_extractor.sampling_rate) inputs = {"array": inputs, "sampling_rate": whisper_pipeline.feature_extractor.sampling_rate} result = whisper_pipeline(inputs, batch_size=BATCH_SIZE, return_timestamps=False) return result["text"] except Exception as e: return f"Error during transcription: {e}" # Classify the sentence to the correct SOAP section def classify_sentence(sentence): similarities = {section: util.pytorch_cos_sim(embedder.encode(sentence), soap_embeddings[section]) for section in soap_prompts.keys()} return max(similarities, key=similarities.get) # Summarize the section if it's too long def summarize_section(section_text): if len(section_text.split()) < 50: return section_text target_length = int(len(section_text.split()) * 0.50) inputs = tokenizer.encode(section_text, return_tensors="pt", truncation=True, max_length=1024) summary_ids = summarizer_model.generate( inputs, max_length=target_length, min_length=int(target_length * 0.45), length_penalty=1.0, num_beams=4 ) return tokenizer.decode(summary_ids[0], skip_special_tokens=True) # Analyze the SOAP content and divide into sections def soap_analysis(text): doc = nlp(text) soap_note = {section: "" for section in soap_prompts.keys()} for sentence in doc.sents: section = classify_sentence(sentence.text) soap_note[section] += sentence.text + " " # Summarize each section of the SOAP note for section in soap_note: soap_note[section] = summarize_section(soap_note[section].strip()) return format_soap_output(soap_note) # Format the SOAP note output def format_soap_output(soap_note): return ( f"Subjective:\n{soap_note['subjective']}\n\n" f"Objective:\n{soap_note['objective']}\n\n" f"Assessment:\n{soap_note['assessment']}\n\n" f"Plan:\n{soap_note['plan']}\n" ) # Process file function for audio/video to SOAP def process_file(file, user_prompt): # Determine file type and convert if necessary if file.name.endswith(".mp4"): temp_mp3_path = tempfile.NamedTemporaryFile(suffix=".mp3", delete=False).name try: convert_mp4_to_mp3(file.name, temp_mp3_path) audio_path = temp_mp3_path except Exception as e: return f"Error during MP4 to MP3 conversion: {e}", "", "" else: audio_path = file.name # Transcribe audio transcription = transcribe_audio(audio_path) print("Transcribed Text: ", transcription) # Perform SOAP analysis soap_note = soap_analysis(transcription) print("SOAP Notes: ", soap_note) # # Generate template and JSON using LLaMA # template_output = llama_query(user_prompt, soap_note) # print("Template: ", template_output) # json_output = llama_convert_to_json(template_output) # Clean up temporary files if file.name.endswith(".mp4"): os.remove(temp_mp3_path) return soap_note#, template_output, json_output # Process text function for text input to SOAP def process_text(text, user_prompt): soap_note = soap_analysis(text) print(soap_note) # template_output = llama_query(user_prompt, soap_note) # print(template_output) # json_output = llama_convert_to_json(template_output) return soap_note#, template_output, json_output # # Llama query function # def llama_query(user_prompt, soap_note, model="llama3.2"): # combined_prompt = f"User Instructions:\n{user_prompt}\n\nContext:\n{soap_note}" # try: # process = Popen(['ollama', 'run', model], stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True, encoding='utf-8') # stdout, stderr = process.communicate(input=combined_prompt) # if process.returncode != 0: # return f"Error: {stderr.strip()}" # return stdout.strip() # except Exception as e: # return f"Unexpected error: {str(e)}" # # Convert the response to JSON format # def llama_convert_to_json(template_output, model="llama3.2"): # json_prompt = f"Convert the following template into a structured JSON format:\n\n{template_output}" # try: # process = Popen(['ollama', 'run', model], stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True, encoding='utf-8') # stdout, stderr = process.communicate(input=json_prompt) # if process.returncode != 0: # return f"Error: {stderr.strip()}" # return stdout.strip() # Assuming the model outputs a valid JSON string # except Exception as e: # return f"Unexpected error: {str(e)}" # Gradio interface def launch_gradio(): with gr.Blocks(theme=gr.themes.Default()) as demo: gr.Markdown("# Enhanced Video to SOAP Note Generator") with gr.Tab("Audio/Video File to SOAP"): gr.Interface( fn=process_file, inputs=[gr.File(label="Upload Audio/Video File"), gr.Textbox(label="Enter Prompt for Template", placeholder="Enter a detailed prompt...", lines=6)], outputs=[ gr.Textbox(label="SOAP Note"), # gr.Textbox(label="Generated Template from LLaMA"), # gr.Textbox(label="JSON Output") ], ) with gr.Tab("Text Input to SOAP"): gr.Interface( fn=process_text, inputs=[gr.Textbox(label="Enter Text", placeholder="Enter medical notes...", lines=6), gr.Textbox(label="Enter Prompt for Template", placeholder="Enter a detailed prompt...", lines=6)], outputs=[ gr.Textbox(label="SOAP Note"), # gr.Textbox(label="Generated Template from LLaMA"), # gr.Textbox(label="JSON Output") ], ) demo.launch(share=True, debug=True) # Run the Gradio app if __name__ == "__main__": launch_gradio()