VanguardAI commited on
Commit
c974ae6
·
verified ·
1 Parent(s): 4a19484

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -37
app.py CHANGED
@@ -6,35 +6,18 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
6
  from peft import LoraConfig, PeftModel, get_peft_model
7
  import gradio as gr
8
 
9
- # Load tokenizer
10
- tokenizer = AutoTokenizer.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters")
11
-
12
- # Configuration for 4-bit quantization
13
- bnb_config = BitsAndBytesConfig(
14
- load_in_4bit=True,
15
- bnb_4bit_use_double_quant=True,
16
- bnb_4bit_quant_type="nf4",
17
- bnb_4bit_compute_dtype=torch.bfloat16
18
- )
19
-
20
- # Load base model with quantization (replace 'your-username' if needed)
21
- base_model = AutoModelForCausalLM.from_pretrained(
22
- "meta-llama/Meta-Llama-3-8B-Instruct", # Replace with actual base model
23
- quantization_config=bnb_config,
24
- use_auth_token=HF_TOKEN,
25
- )
26
-
27
- # Apply LoRA adapters
28
- peft_config = LoraConfig(
29
- r=16,
30
- lora_alpha=16,
31
- target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
32
- lora_dropout=0,
33
- bias="none",
34
- task_type="CAUSAL_LM"
35
- )
36
-
37
- model = PeftModel.from_pretrained(base_model, "VanguardAI/BhashiniLLaMa3-8B_LoRA_Adapters", config=peft_config)
38
 
39
  condition = '''
40
  ALWAYS provide output in a JSON format.
@@ -51,7 +34,7 @@ alpaca_prompt = """Below is an instruction that describes a task, paired with an
51
  {}"""
52
 
53
 
54
- @spaces.GPU(duration=300)
55
  def chunk_it(inventory_list, user_input_text):
56
  model.to('cuda')
57
  inputs = tokenizer(
@@ -93,10 +76,7 @@ def chunk_it(inventory_list, user_input_text):
93
  ReportType (string: "profit", "revenue", "inventory", or "Null" for all reports)
94
 
95
  The ItemName must always be matched from the below list of names, EXCEPT for when the Function is "new items".
96
- ''' + inventory_list +
97
- '''
98
- ALWAYS provide output in a JSON format.
99
- ''', # instruction
100
  user_input_text, # input
101
  "", # output - leave this blank for generation!
102
  )
@@ -105,8 +85,12 @@ def chunk_it(inventory_list, user_input_text):
105
  # Generation with a longer max_length and better sampling
106
  outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
107
 
108
- content = tokenizer.batch_decode(outputs, skip_special_tokens=True)
109
- return content[0]
 
 
 
 
110
 
111
  # Interface for inputs
112
  iface = gr.Interface(
@@ -116,7 +100,7 @@ iface = gr.Interface(
116
  gr.Textbox(label="inventory_list", lines=5)
117
  ],
118
  outputs="text",
119
- title="Formatter Pro",
120
  )
121
 
122
  iface.launch(inline=False)
 
6
  from peft import LoraConfig, PeftModel, get_peft_model
7
  import gradio as gr
8
 
9
+ tokenizer = AutoTokenizer.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_16bit_LoRA_Adapters", trust_remote_code=True)
10
+ quantization_config = BitsAndBytesConfig(
11
+ load_in_4bit=True,
12
+ bnb_4bit_use_double_quant=True,
13
+ bnb_4bit_quant_type="nf4",
14
+ bnb_4bit_compute_dtype=torch.float16)
15
+ model = AutoModelForCausalLM.from_pretrained("VanguardAI/BhashiniLLaMa3-8B_16bit_LoRA_Adapters",
16
+ quantization_config=quantization_config,
17
+ torch_dtype =torch.bfloat16,
18
+ low_cpu_mem_usage=True,
19
+ use_safetensors=True,
20
+ trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
 
22
  condition = '''
23
  ALWAYS provide output in a JSON format.
 
34
  {}"""
35
 
36
 
37
+ @spaces.GPU()
38
  def chunk_it(inventory_list, user_input_text):
39
  model.to('cuda')
40
  inputs = tokenizer(
 
76
  ReportType (string: "profit", "revenue", "inventory", or "Null" for all reports)
77
 
78
  The ItemName must always be matched from the below list of names, EXCEPT for when the Function is "new items".
79
+ ''' + inventory_list + condition, # instruction
 
 
 
80
  user_input_text, # input
81
  "", # output - leave this blank for generation!
82
  )
 
85
  # Generation with a longer max_length and better sampling
86
  outputs = model.generate(**inputs, max_new_tokens=216, use_cache=True)
87
 
88
+ reply = tokenizer.batch_decode(outputs, skip_special_tokens=True)
89
+ pattern = r"### Response:\n(.*?)<\|end_of_text\|>"
90
+ # Search for the pattern in the text
91
+ match = re.search(pattern, reply[0], re.DOTALL) # re.DOTALL allows '.' to match newlines
92
+ reply = match.group(1).strip()
93
+ return reply
94
 
95
  # Interface for inputs
96
  iface = gr.Interface(
 
100
  gr.Textbox(label="inventory_list", lines=5)
101
  ],
102
  outputs="text",
103
+ title="Bhashini_Ki",
104
  )
105
 
106
  iface.launch(inline=False)