|
import gradio as gr |
|
from transformers import AutoModelForCausalLM, AutoTokenizer |
|
import torch |
|
|
|
|
|
model_name = "ruslanmv/ai-medical-model-32bit" |
|
tokenizer = AutoTokenizer.from_pretrained(model_name) |
|
model = AutoModelForCausalLM.from_pretrained(model_name) |
|
model.to("cuda") |
|
|
|
|
|
|
|
def ask_medical_question(question): |
|
prompt = f"<|start_header_id|>system<|end_header_id|> You are a Medical AI chatbot assistant. <|eot_id|><|start_header_id|>User: <|end_header_id|>This is the question: {question}<|eot_id|>" |
|
|
|
inputs = tokenizer(prompt, return_tensors="pt").to("cuda") |
|
outputs = model.generate( |
|
**inputs, |
|
max_new_tokens=256, |
|
temperature=0.7, |
|
do_sample=True, |
|
top_p=0.95, |
|
top_k=50, |
|
) |
|
response = tokenizer.decode(outputs[0], skip_special_tokens=True) |
|
return response |
|
|
|
|
|
|
|
iface = gr.Interface(fn=ask_medical_question, inputs="text", outputs="text") |
|
iface.launch() |
|
|