Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -7,7 +7,6 @@ from pylatexenc.latex2text import LatexNodes2Text
|
|
7 |
from transformers import (
|
8 |
AutoTokenizer,
|
9 |
AutoModelForCausalLM,
|
10 |
-
BitsAndBytesConfig,
|
11 |
Qwen2VLForConditionalGeneration,
|
12 |
AutoProcessor
|
13 |
)
|
@@ -39,21 +38,16 @@ def load_ocr_model():
|
|
39 |
|
40 |
@st.cache_resource(show_spinner=False)
|
41 |
def load_llm_model():
|
42 |
-
# Load LLM model and tokenizer
|
43 |
-
bnb_config = BitsAndBytesConfig(
|
44 |
-
load_in_4bit=True,
|
45 |
-
bnb_4bit_use_double_quant=True,
|
46 |
-
bnb_4bit_quant_type="nf4",
|
47 |
-
bnb_4bit_compute_dtype=torch.bfloat16
|
48 |
-
)
|
49 |
model_name = "deepseek-ai/deepseek-math-7b-instruct"
|
50 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
|
|
|
|
|
51 |
model = AutoModelForCausalLM.from_pretrained(
|
52 |
model_name,
|
53 |
-
|
54 |
-
device_map="auto"
|
55 |
)
|
56 |
-
tokenizer.pad_token = tokenizer.eos_token
|
57 |
return model, tokenizer
|
58 |
|
59 |
#############################
|
@@ -85,7 +79,8 @@ def img_2_text(image, model_ocr, processor_ocr):
|
|
85 |
padding=True,
|
86 |
return_tensors="pt",
|
87 |
)
|
88 |
-
|
|
|
89 |
|
90 |
generated_ids = model_ocr.generate(**inputs, max_new_tokens=512)
|
91 |
generated_ids_trimmed = [
|
@@ -97,17 +92,15 @@ def img_2_text(image, model_ocr, processor_ocr):
|
|
97 |
return output_text[0].split('<|im_end|>')[0]
|
98 |
|
99 |
def expression_solver(expression, model_llm, tokenizer_llm):
|
100 |
-
device = next(model_llm.parameters()).device
|
101 |
prompt = f"""You are a helpful math assistant. Please analyze the problem carefully and provide a step-by-step solution.
|
102 |
- If the problem is an equation, solve for the unknown variable(s).
|
103 |
- If it is an expression, simplify it fully.
|
104 |
- If it is a word problem, explain how you arrive at the result.
|
105 |
- Output final value, either True or False in case of expressions where you have to verify, or the value of variables in expressions where you have to solve in a <ANS> </ANS> tag with no other text in it.
|
106 |
-
|
107 |
Problem: {expression}
|
108 |
Answer:
|
109 |
"""
|
110 |
-
inputs = tokenizer_llm(prompt, return_tensors="pt").to(
|
111 |
outputs = model_llm.generate(
|
112 |
**inputs,
|
113 |
max_new_tokens=512,
|
|
|
7 |
from transformers import (
|
8 |
AutoTokenizer,
|
9 |
AutoModelForCausalLM,
|
|
|
10 |
Qwen2VLForConditionalGeneration,
|
11 |
AutoProcessor
|
12 |
)
|
|
|
38 |
|
39 |
@st.cache_resource(show_spinner=False)
|
40 |
def load_llm_model():
|
41 |
+
# Load LLM model and tokenizer for CPU-only execution. The BitsAndBytes config is removed.
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
model_name = "deepseek-ai/deepseek-math-7b-instruct"
|
43 |
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
44 |
+
tokenizer.pad_token = tokenizer.eos_token
|
45 |
+
|
46 |
+
# Load model on CPU (since no CUDA is available)
|
47 |
model = AutoModelForCausalLM.from_pretrained(
|
48 |
model_name,
|
49 |
+
device_map="cpu"
|
|
|
50 |
)
|
|
|
51 |
return model, tokenizer
|
52 |
|
53 |
#############################
|
|
|
79 |
padding=True,
|
80 |
return_tensors="pt",
|
81 |
)
|
82 |
+
# Move inputs to CPU, since that's our only device available
|
83 |
+
inputs = inputs.to("cpu")
|
84 |
|
85 |
generated_ids = model_ocr.generate(**inputs, max_new_tokens=512)
|
86 |
generated_ids_trimmed = [
|
|
|
92 |
return output_text[0].split('<|im_end|>')[0]
|
93 |
|
94 |
def expression_solver(expression, model_llm, tokenizer_llm):
|
|
|
95 |
prompt = f"""You are a helpful math assistant. Please analyze the problem carefully and provide a step-by-step solution.
|
96 |
- If the problem is an equation, solve for the unknown variable(s).
|
97 |
- If it is an expression, simplify it fully.
|
98 |
- If it is a word problem, explain how you arrive at the result.
|
99 |
- Output final value, either True or False in case of expressions where you have to verify, or the value of variables in expressions where you have to solve in a <ANS> </ANS> tag with no other text in it.
|
|
|
100 |
Problem: {expression}
|
101 |
Answer:
|
102 |
"""
|
103 |
+
inputs = tokenizer_llm(prompt, return_tensors="pt").to("cpu")
|
104 |
outputs = model_llm.generate(
|
105 |
**inputs,
|
106 |
max_new_tokens=512,
|