import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import safetensors.torch import spaces # Load base model and tokenizer base_model_name = "AIDC-AI/Marco-o1" tokenizer = AutoTokenizer.from_pretrained(base_model_name) model = AutoModelForCausalLM.from_pretrained(base_model_name) # Load your lightweight safetensor safetensor_path = "MegaTronX/Odyssey-SelectolaxQLoRA" model.load_state_dict(safetensors.torch.load_file(safetensor_path), strict=False) @spaces.GPU def predict(text): inputs = tokenizer.encode(text, return_tensors="pt") outputs = model.generate(inputs) prediction = tokenizer.decode(outputs[0], skip_special_tokens=True) return prediction demo = gr.Interface(fn=predict, inputs="text", outputs="text") demo.launch()