import gradio as gr from huggingface_hub import InferenceClient client = InferenceClient("thviet79/model-QA-medical") def respond(message, history, system_message, max_tokens, temperature, top_p): # Assuming it's a question-answering model response = client.question_answering( inputs={"question": message, "context": system_message} ) return response['answer'] # Extract the answer from the response demo = gr.ChatInterface( respond, additional_inputs=[ gr.Textbox(value="Context for medical questions.", label="System message"), gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"), gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"), gr.Slider(minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p"), ], ) if __name__ == "__main__": demo.launch()