Spaces:

ukumar557
/

Chatbot

Runtime error

Chatbot / app.py

Update app.py

ac21e0a verified 7 months ago

1.21 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	# Check if CUDA is available
	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
	if device.type == "cpu":
	print("Warning: CUDA is not available. Running on CPU, which may be slow.")

	# Load the tokenizer and model directly
	model_name = "ruslanmv/ai-medical-model-32bit"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(model_name)



	# Function to ask medical questions
	def ask_medical_question(question):
	prompt = f"<\|start_header_id\|>system<\|end_header_id\|> You are a Medical AI chatbot assistant. <\|eot_id\|><\|start_header_id\|>User: <\|end_header_id\|>This is the question: {question}<\|eot_id\|>"

	inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
	outputs = model.generate(
	**inputs,
	max_new_tokens=256,
	temperature=0.7,
	do_sample=True,
	top_p=0.95,
	top_k=50,
	)
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)
	return response


	# Set up Gradio interface
	iface = gr.Interface(fn=ask_medical_question, inputs="text", outputs="text")
	iface.launch()