BoburAmirov commited on
Commit
e0202e2
·
1 Parent(s): 5dc8399

model load class updated

Browse files
Files changed (1) hide show
  1. app.py +12 -22
app.py CHANGED
@@ -1,24 +1,17 @@
1
  import torch
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
- from peft import PeftModel
4
  import gradio as gr
5
 
6
- # Load the base model and tokenizer
7
- base_model_path = "NousResearch/Llama-2-7b-chat-hf" # Path to the base model
8
- tokenizer_path = "BoburAmirov/test-llama-uz" # Path to the tokenizer
9
 
10
- # Load the tokenizer
11
- tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)
 
 
12
  tokenizer.pad_token = tokenizer.eos_token
13
  tokenizer.padding_side = "right"
14
 
15
- # Load the base model
16
- base_model = AutoModelForCausalLM.from_pretrained(base_model_path)
17
-
18
- # Load the adapter
19
- adapter_path = "BoburAmirov/test-llama-uz/adapter_model.safetensors"
20
- model = PeftModel.from_pretrained(base_model, adapter_path)
21
-
22
  # Set the model to evaluation mode
23
  model.eval()
24
 
@@ -30,7 +23,7 @@ def generate_text(input_prompt):
30
  with torch.no_grad():
31
  output = model.generate(
32
  input_ids,
33
- max_length=200, # Adjust max_length as needed
34
  num_return_sequences=1,
35
  temperature=0.7, # Control randomness
36
  top_p=0.9, # Control diversity
@@ -42,16 +35,13 @@ def generate_text(input_prompt):
42
  return generated_text
43
 
44
  # Create a Gradio interface
45
- interface = gr.Interface(
46
  fn=generate_text,
47
  inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your prompt here..."),
48
  outputs="text",
49
- title="Text Generation with LLaMA-2",
50
- description="Enter a prompt and get generated text from the fine-tuned LLaMA-2 model."
51
  )
52
 
53
- # Launch the Gradio interface
54
-
55
-
56
  if __name__ == "__main__":
57
- interface.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import torch
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer, AutoPeftModelForCausalLM
 
3
  import gradio as gr
4
 
5
+ # Load the fine-tuned model and tokenizer
6
+ model_path = "BoburAmirov/test-llama-uz" # Adjust this to the path where your fine-tuned model is saved
 
7
 
8
+ model = AutoPeftModelForCausalLM.from_pretrained(model_path, device_map='auto')
9
+ tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
10
+
11
+ # Ensure the tokenizer settings match those used during training
12
  tokenizer.pad_token = tokenizer.eos_token
13
  tokenizer.padding_side = "right"
14
 
 
 
 
 
 
 
 
15
  # Set the model to evaluation mode
16
  model.eval()
17
 
 
23
  with torch.no_grad():
24
  output = model.generate(
25
  input_ids,
26
+ max_length=400, # Adjust max_length as needed
27
  num_return_sequences=1,
28
  temperature=0.7, # Control randomness
29
  top_p=0.9, # Control diversity
 
35
  return generated_text
36
 
37
  # Create a Gradio interface
38
+ iface = gr.Interface(
39
  fn=generate_text,
40
  inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your prompt here..."),
41
  outputs="text",
42
+ title="Text Generation with LLaMA",
43
+ description="Generate text using a fine-tuned LLaMA model."
44
  )
45
 
 
 
 
46
  if __name__ == "__main__":
47
+ iface.launch(server_name="0.0.0.0", server_port=7860)