import os import logging import gradio as gr from PIL import Image from transformers import AutoModelForCausalLM, AutoTokenizer from llama_cpp import Llama from huggingface_hub import hf_hub_download logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) # ------------------------------ # 🔹 Load Bioformer-8L Model # ------------------------------ BIOFORMER_MODEL = "bioformers/bioformer-8L" bioformer_tokenizer = AutoTokenizer.from_pretrained(BIOFORMER_MODEL) bioformer_model = AutoModelForCausalLM.from_pretrained(BIOFORMER_MODEL) # ------------------------------ # 🔹 Load DeepSeek-R1-Distill-Qwen-7B-GGUF Model # ------------------------------ DEEPSEEK_REPO = "lmstudio-community/DeepSeek-R1-Distill-Qwen-7B-GGUF" DEEPSEEK_FILENAME = "DeepSeek-R1-Distill-Qwen-7B-Q4_0.gguf" model_path = hf_hub_download(repo_id=DEEPSEEK_REPO, filename=DEEPSEEK_FILENAME) llm = Llama( model_path=model_path, n_ctx=4096, n_gpu_layers=0, # CPU inference logits_all=True, n_batch=256 ) logger.info("Models Loaded Successfully.") # ------------------------------ # 🔹 Unified Medical Prompt # ------------------------------ UNIFIED_MEDICAL_PROMPT = """ You are an advanced Medical AI Assistant capable of providing thorough, comprehensive answers for a wide range of medical specialties: General Practice, Radiology, Cardiology, Neurology, Psychiatry, Pediatrics, Endocrinology, Oncology, and more. You can: 1) Analyze images if provided (Radiology). 2) Retrieve relevant documents from a knowledge base (Vector Store). 3) Provide scientific, evidence-based explanations and references when possible. Always strive to provide a detailed, helpful, and empathetic response. """ # ------------------------------ # 🔹 Chat Function # ------------------------------ def chat_with_agent(user_query, image_file=None): # Combine context combined_context = f""" {UNIFIED_MEDICAL_PROMPT} Patient Query: "{user_query}" Your Response: """ # Generate response using DeepSeek-R1-Distill model response_accumulator = "" for token in llm( prompt=combined_context, max_tokens=1024, temperature=0.7, top_p=0.9, stream=True ): partial_text = token["choices"][0]["text"] response_accumulator += partial_text yield response_accumulator # ------------------------------ # 🔹 Gradio Interface # ------------------------------ with gr.Blocks(title="🏥 Llama3-Med AI Assistant") as demo: gr.Markdown(""" # 🏥 Llama3-Med AI Assistant _Your intelligent medical assistant powered by advanced AI._ """) with gr.Row(): user_input = gr.Textbox(label="💬 Ask a medical question", placeholder="Type your question here...") image_file = gr.Image(label="📷 Upload Medical Image (Optional)", type="filepath") submit_btn = gr.Button("🚀 Submit", variant="primary") output_text = gr.Textbox(label="📝 Assistant's Response", interactive=False, lines=25) submit_btn.click(fn=chat_with_agent, inputs=[user_input, image_file], outputs=output_text) if __name__ == "__main__": demo.queue().launch(server_name="0.0.0.0", server_port=7860)