BoburAmirov commited on
Commit
5dc8399
·
1 Parent(s): c8ddca9

update script

Browse files
Files changed (2) hide show
  1. app.py +21 -11
  2. requirements.txt +2 -1
app.py CHANGED
@@ -1,17 +1,24 @@
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
 
3
  import gradio as gr
4
 
5
- # Load the fine-tuned model and tokenizer
6
- model_path = "BoburAmirov/test-llama-uz" # Adjust this to the path where your fine-tuned model is saved
 
7
 
8
- model = AutoModelForCausalLM.from_pretrained(model_path, device_map='auto')
9
- tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
10
-
11
- # Ensure the tokenizer settings match those used during training
12
  tokenizer.pad_token = tokenizer.eos_token
13
  tokenizer.padding_side = "right"
14
 
 
 
 
 
 
 
 
15
  # Set the model to evaluation mode
16
  model.eval()
17
 
@@ -23,7 +30,7 @@ def generate_text(input_prompt):
23
  with torch.no_grad():
24
  output = model.generate(
25
  input_ids,
26
- max_length=400, # Adjust max_length as needed
27
  num_return_sequences=1,
28
  temperature=0.7, # Control randomness
29
  top_p=0.9, # Control diversity
@@ -35,13 +42,16 @@ def generate_text(input_prompt):
35
  return generated_text
36
 
37
  # Create a Gradio interface
38
- iface = gr.Interface(
39
  fn=generate_text,
40
  inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your prompt here..."),
41
  outputs="text",
42
- title="Text Generation with LLaMA",
43
- description="Generate text using a fine-tuned LLaMA model."
44
  )
45
 
 
 
 
46
  if __name__ == "__main__":
47
- iface.launch(server_name="0.0.0.0", server_port=7860)
 
1
  import torch
2
  from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from peft import PeftModel
4
  import gradio as gr
5
 
6
+ # Load the base model and tokenizer
7
+ base_model_path = "NousResearch/Llama-2-7b-chat-hf" # Path to the base model
8
+ tokenizer_path = "BoburAmirov/test-llama-uz" # Path to the tokenizer
9
 
10
+ # Load the tokenizer
11
+ tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, trust_remote_code=True)
 
 
12
  tokenizer.pad_token = tokenizer.eos_token
13
  tokenizer.padding_side = "right"
14
 
15
+ # Load the base model
16
+ base_model = AutoModelForCausalLM.from_pretrained(base_model_path)
17
+
18
+ # Load the adapter
19
+ adapter_path = "BoburAmirov/test-llama-uz/adapter_model.safetensors"
20
+ model = PeftModel.from_pretrained(base_model, adapter_path)
21
+
22
  # Set the model to evaluation mode
23
  model.eval()
24
 
 
30
  with torch.no_grad():
31
  output = model.generate(
32
  input_ids,
33
+ max_length=200, # Adjust max_length as needed
34
  num_return_sequences=1,
35
  temperature=0.7, # Control randomness
36
  top_p=0.9, # Control diversity
 
42
  return generated_text
43
 
44
  # Create a Gradio interface
45
+ interface = gr.Interface(
46
  fn=generate_text,
47
  inputs=gr.inputs.Textbox(lines=2, placeholder="Enter your prompt here..."),
48
  outputs="text",
49
+ title="Text Generation with LLaMA-2",
50
+ description="Enter a prompt and get generated text from the fine-tuned LLaMA-2 model."
51
  )
52
 
53
+ # Launch the Gradio interface
54
+
55
+
56
  if __name__ == "__main__":
57
+ interface.launch(server_name="0.0.0.0", server_port=7860)
requirements.txt CHANGED
@@ -1,3 +1,4 @@
1
  torch
2
  transformers
3
- gradio
 
 
1
  torch
2
  transformers
3
+ gradio
4
+ peft