Spaces:
Sleeping
Sleeping
File size: 6,073 Bytes
686b9bf 55f4e8e f339bab 5f52293 69f2e98 67b1882 69f2e98 0222e83 67b1882 686b9bf 69f2e98 c974ae6 6b2c16b 5f52293 67b1882 d5262d8 0222e83 67b1882 d5262d8 f506cc8 d5262d8 f506cc8 d5262d8 67b1882 c974ae6 686b9bf 55f4e8e 686b9bf 5f52293 686b9bf 281c481 69f2e98 686b9bf 55f4e8e 67b1882 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
import torch
import spaces
import re
from transformers import AutoTokenizer, AutoModelForCausalLM
import gradio as gr
import os
import logging
from unsloth import FastLanguageModel
# Set up logging
logging.basicConfig(
level=logging.DEBUG, # Set the logging level to DEBUG to capture all messages
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
handlers=[
logging.StreamHandler() # Logs will be output to the console
]
)
logger = logging.getLogger(__name__)
READ_HF = os.environ["read_hf"]
alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
### Instruction:
{}
### Input:
{}
### Response:
{}"""
string = '''
You are an AI assistant tasked with managing inventory based on user instructions. You must meticulously analyze each user request to determine the appropriate action and execute it with the correct parameters.
**Here's your step-by-step thought process:**
1. **Identify the Function:** Carefully examine the user's input to determine the primary function they want to perform. The available functions are:
- `transaction`: Record a new item transaction.
- `last n days transactions`: Retrieve transaction records within a specific timeframe.
- `view inventory`: View inventory details for a specific category and risk level.
- `generate report`: Generate an inventory report.
2. **Extract Parameters:** Once you've identified the function, carefully extract the necessary parameters from the user's input. Each function requires specific parameters:
**`transaction`:**
- `ItemName`: (string) **Must be an exact match from the provided Item List.**
- `ItemQt`: (integer) The quantity of the item.
- `Type`: (string) "sale", "purchase", or "return".
**`last n days transactions`:**
- `ItemCategory`: (string) **Must be from the provided Item Category List.**
- `Duration`: (integer) Number of days (convert weeks, months, years to days).
**`view inventory`:**
- `ItemCategory`: (string) **Must be from the provided Item Category List.**
- `RiskType`: (string) "overstock", "understock", or "Null" (if risk inventory is not asked), or "All" for both overstock and understock.
**`generate report`:**
- `ItemCategory`: (string) **Must be from the provided Item Category List.**
- `Duration`: (integer) Number of days (convert weeks, months, years to days).
- `ReportType`: (string): "profit", "revenue", "inventory", or "Null" (for all reports).
3. **Validate Inputs:** Before proceeding, validate the extracted parameters:
- **ItemName:** Ensure the `ItemName` is an exact match from the provided Item List.
- **ItemCategory:** Ensure the `ItemCategory` is from the provided Category List.
- **Data Types:** Verify that all parameters are of the correct data type (string or integer).
4. **Output in JSON:** Always format your response as a JSON object.
**Additional Notes:**
- Pay close attention to the case and spelling of function names and parameters.
Category List : ["Dairy & Eggs", "Beverages & Snacks", "Cleaning & Hygiene", "Grains & Staples", "Personal Care", "Other"]
'''
@spaces.GPU()
num_elements = (5 * 1024 * 1024) // 4
# Create a tensor with the calculated number of elements
tensor = torch.randn(num_elements, dtype=torch.float32)
# Move the tensor to the GPU
tensor_gpu = tensor.to('cuda')
def chunk_it(inventory_list, user_input_text):
logger.info("Loading model and tokenizer...")
try:
model, tokenizer = FastLanguageModel.from_pretrained(
model_name = "VanguardAI/CoT_multi_llama_LoRA_4bit",
max_seq_length = 2048,
dtype = torch.bfloat16,
load_in_4bit = True,
token = READ_HF
)
logger.info("Model and tokenizer loaded.")
except Exception as e:
logger.error(f"Failed to load model and tokenizer: {e}")
raise
logger.info("Enabling native 2x faster inference...")
try:
FastLanguageModel.for_inference(model)
logger.info("Inference enabled.")
except Exception as e:
logger.error(f"Failed to enable native inference: {e}")
raise
formatted_prompt = alpaca_prompt.format(
string + inventory_list, # instruction
user_input_text, # input
"", # output - leave this blank for generation!
)
logger.debug(f"Formatted prompt: {formatted_prompt}")
try:
inputs = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
logger.debug(f"Tokenized inputs: {inputs}")
except Exception as e:
logger.error(f"Failed to tokenize inputs: {e}")
raise
logger.info("Generating output...")
try:
outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
logger.info("Output generated.")
except Exception as e:
logger.error(f"Failed to generate output: {e}")
raise
try:
reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
logger.debug(f"Decoded output: {reply}")
except Exception as e:
logger.error(f"Failed to decode output: {e}")
raise
# Uncomment the following lines if further processing of the reply is needed
# pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
# match = re.search(pattern, reply[0], re.DOTALL)
# reply = match.group(1).strip()
logger.debug(f"Final reply: {reply}")
return reply
# Interface for inputs
iface = gr.Interface(
fn=chunk_it,
inputs=[
gr.Textbox(label="user_input_text", lines=3),
gr.Textbox(label="inventory_list", lines=5)
],
outputs=gr.Textbox(label="output", lines=23),
title="Testing",
)
logger.info("Launching Gradio interface...")
try:
iface.launch(inline=False)
logger.info("Gradio interface launched.")
except Exception as e:
logger.error(f"Failed to launch Gradio interface: {e}")
|