Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -6,8 +6,6 @@ import gradio as gr
|
|
6 |
import os
|
7 |
import logging
|
8 |
from unsloth import FastLanguageModel
|
9 |
-
|
10 |
-
# Set up logging
|
11 |
logging.basicConfig(
|
12 |
level=logging.DEBUG, # Set the logging level to DEBUG to capture all messages
|
13 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
@@ -16,9 +14,7 @@ logging.basicConfig(
|
|
16 |
]
|
17 |
)
|
18 |
logger = logging.getLogger(__name__)
|
19 |
-
|
20 |
READ_HF = os.environ["read_hf"]
|
21 |
-
|
22 |
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
23 |
|
24 |
### Instruction:
|
@@ -29,7 +25,6 @@ alpaca_prompt = """Below is an instruction that describes a task, paired with an
|
|
29 |
|
30 |
### Response:
|
31 |
{}"""
|
32 |
-
|
33 |
string = '''
|
34 |
You are an AI assistant tasked with managing inventory based on user instructions. You must meticulously analyze each user request to determine the appropriate action and execute it with the correct parameters.
|
35 |
|
@@ -74,14 +69,11 @@ You are an AI assistant tasked with managing inventory based on user instruction
|
|
74 |
|
75 |
Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
|
76 |
'''
|
77 |
-
|
78 |
@spaces.GPU()
|
79 |
-
num_elements = (5 * 1024 * 1024) // 4
|
80 |
-
# Create a tensor with the calculated number of elements
|
81 |
-
tensor = torch.randn(num_elements, dtype=torch.float32)
|
82 |
-
# Move the tensor to the GPU
|
83 |
-
tensor_gpu = tensor.to('cuda')
|
84 |
def chunk_it(inventory_list, user_input_text):
|
|
|
|
|
|
|
85 |
logger.info("Loading model and tokenizer...")
|
86 |
try:
|
87 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
@@ -95,7 +87,6 @@ def chunk_it(inventory_list, user_input_text):
|
|
95 |
except Exception as e:
|
96 |
logger.error(f"Failed to load model and tokenizer: {e}")
|
97 |
raise
|
98 |
-
|
99 |
logger.info("Enabling native 2x faster inference...")
|
100 |
try:
|
101 |
FastLanguageModel.for_inference(model)
|
@@ -103,21 +94,18 @@ def chunk_it(inventory_list, user_input_text):
|
|
103 |
except Exception as e:
|
104 |
logger.error(f"Failed to enable native inference: {e}")
|
105 |
raise
|
106 |
-
|
107 |
formatted_prompt = alpaca_prompt.format(
|
108 |
string + inventory_list, # instruction
|
109 |
user_input_text, # input
|
110 |
"", # output - leave this blank for generation!
|
111 |
)
|
112 |
logger.debug(f"Formatted prompt: {formatted_prompt}")
|
113 |
-
|
114 |
try:
|
115 |
inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
|
116 |
logger.debug(f"Tokenized inputs: {inputs}")
|
117 |
except Exception as e:
|
118 |
logger.error(f"Failed to tokenize inputs: {e}")
|
119 |
raise
|
120 |
-
|
121 |
logger.info("Generating output...")
|
122 |
try:
|
123 |
outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
|
@@ -125,22 +113,17 @@ def chunk_it(inventory_list, user_input_text):
|
|
125 |
except Exception as e:
|
126 |
logger.error(f"Failed to generate output: {e}")
|
127 |
raise
|
128 |
-
|
129 |
try:
|
130 |
reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
131 |
logger.debug(f"Decoded output: {reply}")
|
132 |
except Exception as e:
|
133 |
logger.error(f"Failed to decode output: {e}")
|
134 |
raise
|
135 |
-
|
136 |
-
# Uncomment the following lines if further processing of the reply is needed
|
137 |
# pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
|
138 |
# match = re.search(pattern, reply[0], re.DOTALL)
|
139 |
# reply = match.group(1).strip()
|
140 |
-
|
141 |
logger.debug(f"Final reply: {reply}")
|
142 |
return reply
|
143 |
-
|
144 |
# Interface for inputs
|
145 |
iface = gr.Interface(
|
146 |
fn=chunk_it,
|
@@ -151,10 +134,9 @@ iface = gr.Interface(
|
|
151 |
outputs=gr.Textbox(label="output", lines=23),
|
152 |
title="Testing",
|
153 |
)
|
154 |
-
|
155 |
logger.info("Launching Gradio interface...")
|
156 |
try:
|
157 |
iface.launch(inline=False)
|
158 |
logger.info("Gradio interface launched.")
|
159 |
except Exception as e:
|
160 |
-
logger.error(f"Failed to launch Gradio interface: {e}")
|
|
|
6 |
import os
|
7 |
import logging
|
8 |
from unsloth import FastLanguageModel
|
|
|
|
|
9 |
logging.basicConfig(
|
10 |
level=logging.DEBUG, # Set the logging level to DEBUG to capture all messages
|
11 |
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
|
14 |
]
|
15 |
)
|
16 |
logger = logging.getLogger(__name__)
|
|
|
17 |
READ_HF = os.environ["read_hf"]
|
|
|
18 |
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
|
19 |
|
20 |
### Instruction:
|
|
|
25 |
|
26 |
### Response:
|
27 |
{}"""
|
|
|
28 |
string = '''
|
29 |
You are an AI assistant tasked with managing inventory based on user instructions. You must meticulously analyze each user request to determine the appropriate action and execute it with the correct parameters.
|
30 |
|
|
|
69 |
|
70 |
Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
|
71 |
'''
|
|
|
72 |
@spaces.GPU()
|
|
|
|
|
|
|
|
|
|
|
73 |
def chunk_it(inventory_list, user_input_text):
|
74 |
+
num_elements = (5 * 1024 * 1024) // 4
|
75 |
+
tensor = torch.randn(num_elements, dtype=torch.float32)
|
76 |
+
tensor_gpu = tensor.to('cuda')
|
77 |
logger.info("Loading model and tokenizer...")
|
78 |
try:
|
79 |
model, tokenizer = FastLanguageModel.from_pretrained(
|
|
|
87 |
except Exception as e:
|
88 |
logger.error(f"Failed to load model and tokenizer: {e}")
|
89 |
raise
|
|
|
90 |
logger.info("Enabling native 2x faster inference...")
|
91 |
try:
|
92 |
FastLanguageModel.for_inference(model)
|
|
|
94 |
except Exception as e:
|
95 |
logger.error(f"Failed to enable native inference: {e}")
|
96 |
raise
|
|
|
97 |
formatted_prompt = alpaca_prompt.format(
|
98 |
string + inventory_list, # instruction
|
99 |
user_input_text, # input
|
100 |
"", # output - leave this blank for generation!
|
101 |
)
|
102 |
logger.debug(f"Formatted prompt: {formatted_prompt}")
|
|
|
103 |
try:
|
104 |
inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
|
105 |
logger.debug(f"Tokenized inputs: {inputs}")
|
106 |
except Exception as e:
|
107 |
logger.error(f"Failed to tokenize inputs: {e}")
|
108 |
raise
|
|
|
109 |
logger.info("Generating output...")
|
110 |
try:
|
111 |
outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
|
|
|
113 |
except Exception as e:
|
114 |
logger.error(f"Failed to generate output: {e}")
|
115 |
raise
|
|
|
116 |
try:
|
117 |
reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
118 |
logger.debug(f"Decoded output: {reply}")
|
119 |
except Exception as e:
|
120 |
logger.error(f"Failed to decode output: {e}")
|
121 |
raise
|
|
|
|
|
122 |
# pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
|
123 |
# match = re.search(pattern, reply[0], re.DOTALL)
|
124 |
# reply = match.group(1).strip()
|
|
|
125 |
logger.debug(f"Final reply: {reply}")
|
126 |
return reply
|
|
|
127 |
# Interface for inputs
|
128 |
iface = gr.Interface(
|
129 |
fn=chunk_it,
|
|
|
134 |
outputs=gr.Textbox(label="output", lines=23),
|
135 |
title="Testing",
|
136 |
)
|
|
|
137 |
logger.info("Launching Gradio interface...")
|
138 |
try:
|
139 |
iface.launch(inline=False)
|
140 |
logger.info("Gradio interface launched.")
|
141 |
except Exception as e:
|
142 |
+
logger.error(f"Failed to launch Gradio interface: {e}")
|