Spaces:
Runtime error
Runtime error
File size: 1,864 Bytes
ed57fa1 fd165fd ed57fa1 f0f445c fd165fd f0f445c fd165fd df48fb9 ed57fa1 fd165fd ed57fa1 df48fb9 ed57fa1 df48fb9 ed57fa1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
import gradio as gr
from transformers import pipeline
# Load the model and tokenizer
# def load_model():
# # Load the NuminaMath-72B-CoT model
# pipe = pipeline(
# "text-generation",
# model="AI-MO/NuminaMath-72B-CoT",
# torch_dtype="auto",
# device_map="auto" # Automatically map to available GPU/CPU
# )
# return pipe
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
def load_model():
# tokenizer = AutoTokenizer.from_pretrained("AI-MO/NuminaMath-72B-CoT")
tokenizer = AutoTokenizer.from_pretrained("AI-MO/NuminaMath-7B-CoT")
model = AutoModelForCausalLM.from_pretrained(
# "AI-MO/NuminaMath-72B-CoT",
"AI-MO/NuminaMath-7B-CoT",
device_map="auto", # Automatically map to available GPU
# offload_folder="offload" # Offload unused parts to disk
load_in_8bit=True # Load model in 8-bit precision
)
return pipeline("text-generation", model=model, tokenizer=tokenizer)
# Initialize the pipeline
model_pipeline = load_model()
# Define the function to process inputs
def solve_math_question(prompt):
# Generate output using the model
outputs = model_pipeline(prompt, max_new_tokens=300, do_sample=False)
return outputs[0]["generated_text"]
# Define the Gradio interface
with gr.Blocks() as app:
gr.Markdown("# NuminaMath-72B-CoT Math Question Solver")
gr.Markdown(
"Ask a math-related question, and the model will attempt to solve it with reasoning!"
)
with gr.Row():
question = gr.Textbox(
label="Your Math Question",
placeholder="what is 2+2?",
)
output = gr.Textbox(label="Model Output")
submit_button = gr.Button("Solve")
submit_button.click(solve_math_question, inputs=question, outputs=output)
# Launch the app
app.launch() |