Spaces:
Runtime error
Runtime error
File size: 3,259 Bytes
66a5d97 4771e5d 5f2a839 31a5080 ed59139 e802041 5f2a839 ed59139 e802041 ed59139 08c8208 ed59139 e802041 398ee6b ed59139 398ee6b 5f2a839 398ee6b ce82031 3579d89 a6ddd2a 248a751 ce82031 398ee6b ce82031 398ee6b 5f2a839 398ee6b 5f2a839 398ee6b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 |
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
import spaces
import os
IS_SPACES_ZERO = os.environ.get("SPACES_ZERO_GPU", "0") == "1"
IS_SPACE = os.environ.get("SPACE_ID", None) is not None
device = "cuda" if torch.cuda.is_available() else "cpu"
#device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
#dtype = torch.float16
LOW_MEMORY = os.getenv("LOW_MEMORY", "0") == "1"
print(f"Using device: {device}")
#print(f"Using dtype: {dtype}")
print(f"low memory: {LOW_MEMORY}")
model_name = "ruslanmv/Medical-Llama3-8B"
# Move model and tokenizer to the CUDA device
model = AutoModelForCausalLM.from_pretrained(model_name).to(device)
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
@spaces.GPU
def askme(symptoms, question):
sys_message = '''\
You are an AI Medical Assistant trained on a vast dataset of health information. Please be thorough and
provide an informative answer. If you don't know the answer to a specific medical inquiry, advise seeking professional help.
'''
content = symptoms + " " + question
messages = [{"role": "system", "content": sys_message}, {"role": "user", "content": content}]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
# inputs = tokenizer(prompt, return_tensors="pt").to(device) # Ensure inputs are on CUDA device
# outputs = model.generate(**inputs, max_new_tokens=200, use_cache=True)
# response_text = tokenizer.batch_decode(outputs)[0].strip()
# Tokenize all prompts and batch them
tokenized_inputs = tokenizer(prompts, return_tensors="pt", padding=True, truncation=True).to(device)
outputs = model.generate(**tokenized_inputs, max_new_tokens=200, use_cache=True)
# Decode responses
response_texts = tokenizer.batch_decode(outputs, skip_special_tokens=True)
# Remove system messages and content
#response_text = response_text.replace(sys_message, "").replace(content, "").strip()
# Extract only the assistant's response
#assistant_response = response_text.split("<|im_start|>assistant")[1].strip().replace('<|im_end', '')
# Extract only the assistant's response
# Extract assistant's responses
assistant_responses = []
for response_text in response_texts:
assistant_response = response_text.split("assistant")[1].strip().replace('<|im_end', '')
assistant_responses.append(assistant_response)
return assistant_responses
return assistant_response
# Example usage
symptoms = '''\
I'm a 35-year-old male and for the past few months, I've been experiencing fatigue,
increased sensitivity to cold, and dry, itchy skin.
'''
question = '''\
Could these symptoms be related to hypothyroidism?
If so, what steps should I take to get a proper diagnosis and discuss treatment options?
'''
examples = [
[symptoms, question]
]
iface = gr.Interface(
fn=askme,
inputs=["text", "text"],
outputs="text",
examples=examples,
title="Medical AI Chatbot",
description="Ask me a medical question!"
)
iface.launch()
|