GIGAParviz commited on
Commit
4b4114f
·
verified ·
1 Parent(s): cf98681

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +37 -37
app.py CHANGED
@@ -1,45 +1,44 @@
1
  import gradio as gr
2
-
3
-
4
- import transformers
5
  import torch
6
 
7
- model_id = "unsloth/llama-2-7b-bnb-4bit"
8
-
9
- pipeline = transformers.pipeline(
10
- "text-generation",
11
- model=model_id,
12
- model_kwargs={"torch_dtype": torch.bfloat16},
13
- device="cpu",
 
 
 
 
 
 
14
  )
15
 
16
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  def generate_response(user_input):
18
- messages = [
19
- {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
20
- {"role": "user", "content": str(user_input)},
21
- ]
22
-
23
- prompt = pipeline.tokenizer.apply_chat_template(
24
- messages,
25
- tokenize=False,
26
- add_generation_prompt=True
27
- )
28
-
29
- terminators = [
30
- pipeline.tokenizer.eos_token_id,
31
- pipeline.tokenizer.convert_tokens_to_ids("<|eot_id|>")
32
- ]
33
-
34
- outputs = pipeline(
35
- prompt,
36
- max_new_tokens=256,
37
- eos_token_id=terminators,
38
- do_sample=True,
39
- temperature=0.6,
40
- top_p=0.9,
41
- )
42
- return outputs[0]
43
 
44
  # Gradio interface
45
  iface = gr.Interface(
@@ -50,4 +49,5 @@ iface = gr.Interface(
50
  description="Ask a question in Persian or English."
51
  )
52
 
53
- iface.launch()
 
 
1
  import gradio as gr
2
+ from unsloth import FastLanguageModel
3
+ from transformers import BitsAndBytesConfig
 
4
  import torch
5
 
6
+ # Model configuration
7
+ model_name = "unsloth/llama-3-8b-bnb-4bit"
8
+ max_seq_length = 512
9
+ dtype = None
10
+ load_in_4bit = True
11
+
12
+ # Load model and tokenizer
13
+ model, tokenizer = FastLanguageModel.from_pretrained(
14
+ model_name=model_name,
15
+ max_seq_length=2048,
16
+ dtype=None,
17
+ device_map="cpu",
18
+ load_in_4bit=True,
19
  )
20
 
21
+ device = torch.device('cpu')
22
+ model = FastLanguageModel().to(device)
23
+ FastLanguageModel.for_inference(model)
24
+
25
+ # Define Alpaca prompt format
26
+ alpaca_prompt = """
27
+ ### Instruction:
28
+ {0}
29
+ ### Input:
30
+ {1}
31
+ ### Response:
32
+ {2}
33
+ """
34
+
35
+ # Function to generate response
36
  def generate_response(user_input):
37
+ prompt = alpaca_prompt.format(user_input, "", "")
38
+ inputs = tokenizer([prompt], return_tensors="pt").to("cpu")
39
+ outputs = model.generate(**inputs, max_new_tokens=64, use_cache=True)
40
+ response = tokenizer.batch_decode(outputs, skip_special_tokens=True)
41
+ return response[0]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  # Gradio interface
44
  iface = gr.Interface(
 
49
  description="Ask a question in Persian or English."
50
  )
51
 
52
+ # Launch the interface
53
+ iface.launch()