kalekarnn commited on
Commit
d2e30f5
·
verified ·
1 Parent(s): b9fce25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -131
app.py CHANGED
@@ -1,143 +1,71 @@
1
- from datasets import load_dataset, Dataset
2
- from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
3
- from transformers import TrainingArguments
4
- from trl import SFTTrainer, SFTConfig
5
- from peft import LoraConfig, prepare_model_for_kbit_training
6
  import torch
7
 
8
- # Configure quantization
9
- bnb_config = BitsAndBytesConfig(
10
- load_in_4bit=True,
11
- bnb_4bit_quant_type="nf4",
12
- bnb_4bit_compute_dtype=torch.float16,
13
- bnb_4bit_use_double_quant=True,
14
- )
15
-
16
- # Load model and tokenizer
17
  model_name = "microsoft/phi-2"
18
- model = AutoModelForCausalLM.from_pretrained(
19
  model_name,
20
- quantization_config=bnb_config,
21
- device_map="auto",
22
  trust_remote_code=True
23
  )
24
- model.config.use_cache = False
25
-
26
- # Load tokenizer
27
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
28
- tokenizer.pad_token = tokenizer.eos_token
29
-
30
- # Prepare model for k-bit training
31
- model = prepare_model_for_kbit_training(model)
32
-
33
- # Configure LoRA
34
- peft_config = LoraConfig(
35
- r=16,
36
- lora_alpha=32,
37
- lora_dropout=0.05,
38
- bias="none",
39
- task_type="CAUSAL_LM",
40
- target_modules=["q_proj", "k_proj", "v_proj", "dense"]
41
- )
42
-
43
- # Load and preprocess dataset
44
- ds = load_dataset("OpenAssistant/oasst1")
45
- train_dataset = ds['train']
46
-
47
- def format_conversation(example):
48
- """Format the conversation for instruction fine-tuning"""
49
- # Only process root messages (start of conversations)
50
- if example["role"] == "prompter" and example["parent_id"] is None:
51
- conversation = []
52
- current_msg = example
53
- conversation.append(("Human", current_msg["text"]))
54
-
55
- # Follow the conversation thread
56
- current_id = current_msg["message_id"]
57
- while current_id in message_children:
58
- # Get the next message in conversation
59
- next_msg = message_children[current_id]
60
- if next_msg["role"] == "assistant":
61
- conversation.append(("Assistant", next_msg["text"]))
62
- elif next_msg["role"] == "prompter":
63
- conversation.append(("Human", next_msg["text"]))
64
- current_id = next_msg["message_id"]
65
-
66
- if len(conversation) >= 2: # At least one exchange (human->assistant)
67
- formatted_text = ""
68
- for speaker, text in conversation:
69
- formatted_text += f"{speaker}: {text}\n\n"
70
- return {"text": formatted_text.strip()}
71
- return {"text": None}
72
 
73
- # Build message relationships
74
- print("Building conversation threads...")
75
- message_children = {}
76
- for example in train_dataset:
77
- if example["parent_id"] is not None:
78
- message_children[example["parent_id"]] = example
79
-
80
- # Format complete conversations
81
- print("\nFormatting conversations...")
82
- processed_dataset = []
83
- for example in train_dataset:
84
- result = format_conversation(example)
85
- if result["text"] is not None:
86
- processed_dataset.append(result)
87
- if len(processed_dataset) % 100 == 0 and len(processed_dataset) > 0:
88
- print(f"Found {len(processed_dataset)} valid conversations")
89
-
90
- print(f"Final dataset size: {len(processed_dataset)} conversations")
91
-
92
- # Convert to Dataset format
93
- train_dataset = Dataset.from_list(processed_dataset)
94
-
95
- # Remove the redundant conversion
96
- # train_dataset = list(train_dataset)
97
- # train_dataset = Dataset.from_list(train_dataset)
98
-
99
- # Convert to standard dataset for training
100
- train_dataset = list(train_dataset)
101
- train_dataset = Dataset.from_list(train_dataset)
102
-
103
- # Configure SFT parameters
104
- sft_config = SFTConfig(
105
- output_dir="phi2-finetuned",
106
- num_train_epochs=1,
107
- max_steps=500,
108
- per_device_train_batch_size=4,
109
- gradient_accumulation_steps=1,
110
- learning_rate=2e-4,
111
- weight_decay=0.001,
112
- logging_steps=1,
113
- logging_strategy="steps",
114
- save_strategy="steps",
115
- save_steps=100,
116
- save_total_limit=3,
117
- push_to_hub=False,
118
- max_seq_length=512,
119
- report_to="none",
120
  )
121
 
122
- # Initialize trainer
123
- trainer = SFTTrainer(
124
- model=model,
125
- train_dataset=train_dataset, # Changed from dataset to train_dataset
126
- peft_config=peft_config,
127
- args=sft_config,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
  )
129
 
130
- # Train the model
131
- trainer.train()
132
-
133
- # Save the trained model in Hugging Face format
134
- trainer.save_model("phi2-finetuned-final")
135
-
136
- # Save the model in PyTorch format
137
- model_save_path = "phi2-finetuned-final/model.pt"
138
- torch.save({
139
- 'model_state_dict': trainer.model.state_dict(),
140
- 'config': trainer.model.config,
141
- 'peft_config': peft_config,
142
- }, model_save_path)
143
- print(f"Model saved in PyTorch format at: {model_save_path}")
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from peft import PeftModel
 
 
4
  import torch
5
 
6
+ # Load base model and tokenizer
 
 
 
 
 
 
 
 
7
  model_name = "microsoft/phi-2"
8
+ base_model = AutoModelForCausalLM.from_pretrained(
9
  model_name,
10
+ device_map={"": "cpu"}, # Force CPU usage
 
11
  trust_remote_code=True
12
  )
 
 
 
13
  tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ # Load fine-tuned adapter
16
+ model = PeftModel.from_pretrained(
17
+ base_model,
18
+ "phi2-finetuned-final",
19
+ device_map={"": "cpu"} # Force CPU usage
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  )
21
 
22
+ def generate_response(message, history):
23
+ # Format input as instruction-based conversation
24
+ prompt = "You are a helpful AI assistant. Please provide clear and concise responses.\n\n"
25
+ for human, assistant in history[-7:]: # Keep last 7 exchanges for context
26
+ prompt += f"Instruction: {human}\nResponse: {assistant}\n\n"
27
+ prompt += f"Instruction: {message}\nResponse:"
28
+
29
+ # Generate response with limited length
30
+ inputs = tokenizer(prompt, return_tensors="pt")
31
+ with torch.no_grad():
32
+ outputs = model.generate(
33
+ **inputs,
34
+ max_new_tokens=96, # Limited to 96 tokens
35
+ max_length=512, # Keep history context at 512
36
+ temperature=0.6,
37
+ num_return_sequences=1,
38
+ pad_token_id=tokenizer.eos_token_id,
39
+ do_sample=True,
40
+ top_p=0.7,
41
+ min_length=1,
42
+ eos_token_id=tokenizer.eos_token_id,
43
+ early_stopping=True,
44
+ no_repeat_ngram_size=3,
45
+ repetition_penalty=1.2
46
+ )
47
+ response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
48
+ return response.strip()
49
+
50
+ # Create Gradio interface
51
+ css = """
52
+ .gradio-container {max-width: 1000px !important}
53
+ .chatbot {min-height: 700px !important}
54
+ .chat-message {font-size: 16px !important}
55
+ """
56
+
57
+ demo = gr.ChatInterface(
58
+ generate_response,
59
+ chatbot=gr.Chatbot(height=700), # Increased height
60
+ textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=0.9),
61
+ title="Phi-2 Conversational Assistant",
62
+ description="A fine-tuned Phi-2 model for conversational AI",
63
+ theme="soft",
64
+ css=css,
65
+ examples=["Tell me about yourself",
66
+ "What can you help me with?",
67
+ "How do you process information?"],
68
  )
69
 
70
+ if __name__ == "__main__":
71
+ demo.launch(share=True)