import gradio as gr import torch from transformers import AutoModelForCausalLM, AutoTokenizer from huggingface_hub import hf_hub_download # Model name model_name = "OpenGVLab/InternVideo2_5_Chat_8B" # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) # Detect device device = "cuda" if torch.cuda.is_available() else "cpu" # Load model model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, torch_dtype=torch.float16 if device == "cuda" else torch.float32, device_map="auto" if device == "cuda" else None ) # Move model to device model.to(device) # Define inference function def chat_with_model(prompt): inputs = tokenizer(prompt, return_tensors="pt").to(device) output = model.generate(**inputs, max_length=200) return tokenizer.decode(output[0], skip_special_tokens=True) # Create Gradio UI demo = gr.Interface( fn=chat_with_model, inputs=gr.Textbox(placeholder="Type your prompt here..."), outputs="text", title="InternVideo2.5 Chatbot", description="A chatbot powered by InternVideo2_5_Chat_8B.", theme="compact" ) # Run the Gradio app if __name__ == "__main__": demo.launch()