Spaces:

majorSeaweed
/

Techies

Sleeping

App Files Files Community

majorSeaweed commited on Apr 16

Commit

94ba169

verified ·

1 Parent(s): 7d5c11a

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -15

app.py CHANGED Viewed

@@ -7,7 +7,6 @@ from pylatexenc.latex2text import LatexNodes2Text
 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
-    BitsAndBytesConfig,
     Qwen2VLForConditionalGeneration,
     AutoProcessor
 )
@@ -39,21 +38,16 @@ def load_ocr_model():
 @st.cache_resource(show_spinner=False)
 def load_llm_model():
-    # Load LLM model and tokenizer with BitsAndBytes 4-bit quantization configuration
-    bnb_config = BitsAndBytesConfig(
-        load_in_4bit=True,
-        bnb_4bit_use_double_quant=True,
-        bnb_4bit_quant_type="nf4",
-        bnb_4bit_compute_dtype=torch.bfloat16
-    )
     model_name = "deepseek-ai/deepseek-math-7b-instruct"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
-        quantization_config=bnb_config,
-        device_map="auto"
     )
-    tokenizer.pad_token = tokenizer.eos_token
     return model, tokenizer
 #############################
@@ -85,7 +79,8 @@ def img_2_text(image, model_ocr, processor_ocr):
         padding=True,
         return_tensors="pt",
     )
-    inputs = inputs.to(model_ocr.device)
     generated_ids = model_ocr.generate(**inputs, max_new_tokens=512)
     generated_ids_trimmed = [
@@ -97,17 +92,15 @@ def img_2_text(image, model_ocr, processor_ocr):
     return output_text[0].split('<|im_end|>')[0]
 def expression_solver(expression, model_llm, tokenizer_llm):
-    device = next(model_llm.parameters()).device
     prompt = f"""You are a helpful math assistant. Please analyze the problem carefully and provide a step-by-step solution.
 - If the problem is an equation, solve for the unknown variable(s).
 - If it is an expression, simplify it fully.
 - If it is a word problem, explain how you arrive at the result.
 - Output final value, either True or False in case of expressions where you have to verify, or the value of variables in expressions where you have to solve in a <ANS> </ANS> tag with no other text in it.
 Problem: {expression}
 Answer:
 """
-    inputs = tokenizer_llm(prompt, return_tensors="pt").to(device)
     outputs = model_llm.generate(
         **inputs,
         max_new_tokens=512,

 from transformers import (
     AutoTokenizer,
     AutoModelForCausalLM,
     Qwen2VLForConditionalGeneration,
     AutoProcessor
 )
 @st.cache_resource(show_spinner=False)
 def load_llm_model():
+    # Load LLM model and tokenizer for CPU-only execution. The BitsAndBytes config is removed.
     model_name = "deepseek-ai/deepseek-math-7b-instruct"
     tokenizer = AutoTokenizer.from_pretrained(model_name)
+    tokenizer.pad_token = tokenizer.eos_token
+    # Load model on CPU (since no CUDA is available)
     model = AutoModelForCausalLM.from_pretrained(
         model_name,
+        device_map="cpu"
     )
     return model, tokenizer
 #############################
         padding=True,
         return_tensors="pt",
     )
+    # Move inputs to CPU, since that's our only device available
+    inputs = inputs.to("cpu")
     generated_ids = model_ocr.generate(**inputs, max_new_tokens=512)
     generated_ids_trimmed = [
     return output_text[0].split('<|im_end|>')[0]
 def expression_solver(expression, model_llm, tokenizer_llm):
     prompt = f"""You are a helpful math assistant. Please analyze the problem carefully and provide a step-by-step solution.
 - If the problem is an equation, solve for the unknown variable(s).
 - If it is an expression, simplify it fully.
 - If it is a word problem, explain how you arrive at the result.
 - Output final value, either True or False in case of expressions where you have to verify, or the value of variables in expressions where you have to solve in a <ANS> </ANS> tag with no other text in it.
 Problem: {expression}
 Answer:
 """
+    inputs = tokenizer_llm(prompt, return_tensors="pt").to("cpu")
     outputs = model_llm.generate(
         **inputs,
         max_new_tokens=512,