majorSeaweed commited on
Commit
94ba169
·
verified ·
1 Parent(s): 7d5c11a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -15
app.py CHANGED
@@ -7,7 +7,6 @@ from pylatexenc.latex2text import LatexNodes2Text
7
  from transformers import (
8
  AutoTokenizer,
9
  AutoModelForCausalLM,
10
- BitsAndBytesConfig,
11
  Qwen2VLForConditionalGeneration,
12
  AutoProcessor
13
  )
@@ -39,21 +38,16 @@ def load_ocr_model():
39
 
40
  @st.cache_resource(show_spinner=False)
41
  def load_llm_model():
42
- # Load LLM model and tokenizer with BitsAndBytes 4-bit quantization configuration
43
- bnb_config = BitsAndBytesConfig(
44
- load_in_4bit=True,
45
- bnb_4bit_use_double_quant=True,
46
- bnb_4bit_quant_type="nf4",
47
- bnb_4bit_compute_dtype=torch.bfloat16
48
- )
49
  model_name = "deepseek-ai/deepseek-math-7b-instruct"
50
  tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
51
  model = AutoModelForCausalLM.from_pretrained(
52
  model_name,
53
- quantization_config=bnb_config,
54
- device_map="auto"
55
  )
56
- tokenizer.pad_token = tokenizer.eos_token
57
  return model, tokenizer
58
 
59
  #############################
@@ -85,7 +79,8 @@ def img_2_text(image, model_ocr, processor_ocr):
85
  padding=True,
86
  return_tensors="pt",
87
  )
88
- inputs = inputs.to(model_ocr.device)
 
89
 
90
  generated_ids = model_ocr.generate(**inputs, max_new_tokens=512)
91
  generated_ids_trimmed = [
@@ -97,17 +92,15 @@ def img_2_text(image, model_ocr, processor_ocr):
97
  return output_text[0].split('<|im_end|>')[0]
98
 
99
  def expression_solver(expression, model_llm, tokenizer_llm):
100
- device = next(model_llm.parameters()).device
101
  prompt = f"""You are a helpful math assistant. Please analyze the problem carefully and provide a step-by-step solution.
102
  - If the problem is an equation, solve for the unknown variable(s).
103
  - If it is an expression, simplify it fully.
104
  - If it is a word problem, explain how you arrive at the result.
105
  - Output final value, either True or False in case of expressions where you have to verify, or the value of variables in expressions where you have to solve in a <ANS> </ANS> tag with no other text in it.
106
-
107
  Problem: {expression}
108
  Answer:
109
  """
110
- inputs = tokenizer_llm(prompt, return_tensors="pt").to(device)
111
  outputs = model_llm.generate(
112
  **inputs,
113
  max_new_tokens=512,
 
7
  from transformers import (
8
  AutoTokenizer,
9
  AutoModelForCausalLM,
 
10
  Qwen2VLForConditionalGeneration,
11
  AutoProcessor
12
  )
 
38
 
39
  @st.cache_resource(show_spinner=False)
40
  def load_llm_model():
41
+ # Load LLM model and tokenizer for CPU-only execution. The BitsAndBytes config is removed.
 
 
 
 
 
 
42
  model_name = "deepseek-ai/deepseek-math-7b-instruct"
43
  tokenizer = AutoTokenizer.from_pretrained(model_name)
44
+ tokenizer.pad_token = tokenizer.eos_token
45
+
46
+ # Load model on CPU (since no CUDA is available)
47
  model = AutoModelForCausalLM.from_pretrained(
48
  model_name,
49
+ device_map="cpu"
 
50
  )
 
51
  return model, tokenizer
52
 
53
  #############################
 
79
  padding=True,
80
  return_tensors="pt",
81
  )
82
+ # Move inputs to CPU, since that's our only device available
83
+ inputs = inputs.to("cpu")
84
 
85
  generated_ids = model_ocr.generate(**inputs, max_new_tokens=512)
86
  generated_ids_trimmed = [
 
92
  return output_text[0].split('<|im_end|>')[0]
93
 
94
  def expression_solver(expression, model_llm, tokenizer_llm):
 
95
  prompt = f"""You are a helpful math assistant. Please analyze the problem carefully and provide a step-by-step solution.
96
  - If the problem is an equation, solve for the unknown variable(s).
97
  - If it is an expression, simplify it fully.
98
  - If it is a word problem, explain how you arrive at the result.
99
  - Output final value, either True or False in case of expressions where you have to verify, or the value of variables in expressions where you have to solve in a <ANS> </ANS> tag with no other text in it.
 
100
  Problem: {expression}
101
  Answer:
102
  """
103
+ inputs = tokenizer_llm(prompt, return_tensors="pt").to("cpu")
104
  outputs = model_llm.generate(
105
  **inputs,
106
  max_new_tokens=512,