Spaces:

kalekarnn
/

fine-tuned-phi-2-model

Sleeping

App Files Files Community

kalekarnn commited on Mar 22

Commit

d2e30f5

verified ·

1 Parent(s): b9fce25

Update app.py

Browse files

Files changed (1) hide show

app.py +59 -131

app.py CHANGED Viewed

@@ -1,143 +1,71 @@
-from datasets import load_dataset, Dataset
-from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
-from transformers import TrainingArguments
-from trl import SFTTrainer, SFTConfig
-from peft import LoraConfig, prepare_model_for_kbit_training
 import torch
-# Configure quantization
-bnb_config = BitsAndBytesConfig(
-    load_in_4bit=True,
-    bnb_4bit_quant_type="nf4",
-    bnb_4bit_compute_dtype=torch.float16,
-    bnb_4bit_use_double_quant=True,
-)
-# Load model and tokenizer
 model_name = "microsoft/phi-2"
-model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    quantization_config=bnb_config,
-    device_map="auto",
     trust_remote_code=True
 )
-model.config.use_cache = False
-# Load tokenizer
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
-tokenizer.pad_token = tokenizer.eos_token
-# Prepare model for k-bit training
-model = prepare_model_for_kbit_training(model)
-# Configure LoRA
-peft_config = LoraConfig(
-    r=16,
-    lora_alpha=32,
-    lora_dropout=0.05,
-    bias="none",
-    task_type="CAUSAL_LM",
-    target_modules=["q_proj", "k_proj", "v_proj", "dense"]
-)
-# Load and preprocess dataset
-ds = load_dataset("OpenAssistant/oasst1")
-train_dataset = ds['train']
-def format_conversation(example):
-    """Format the conversation for instruction fine-tuning"""
-    # Only process root messages (start of conversations)
-    if example["role"] == "prompter" and example["parent_id"] is None:
-        conversation = []
-        current_msg = example
-        conversation.append(("Human", current_msg["text"]))
-        # Follow the conversation thread
-        current_id = current_msg["message_id"]
-        while current_id in message_children:
-            # Get the next message in conversation
-            next_msg = message_children[current_id]
-            if next_msg["role"] == "assistant":
-                conversation.append(("Assistant", next_msg["text"]))
-            elif next_msg["role"] == "prompter":
-                conversation.append(("Human", next_msg["text"]))
-            current_id = next_msg["message_id"]
-        if len(conversation) >= 2:  # At least one exchange (human->assistant)
-            formatted_text = ""
-            for speaker, text in conversation:
-                formatted_text += f"{speaker}: {text}\n\n"
-            return {"text": formatted_text.strip()}
-    return {"text": None}
-# Build message relationships
-print("Building conversation threads...")
-message_children = {}
-for example in train_dataset:
-    if example["parent_id"] is not None:
-        message_children[example["parent_id"]] = example
-# Format complete conversations
-print("\nFormatting conversations...")
-processed_dataset = []
-for example in train_dataset:
-    result = format_conversation(example)
-    if result["text"] is not None:
-        processed_dataset.append(result)
-    if len(processed_dataset) % 100 == 0 and len(processed_dataset) > 0:
-        print(f"Found {len(processed_dataset)} valid conversations")
-print(f"Final dataset size: {len(processed_dataset)} conversations")
-# Convert to Dataset format
-train_dataset = Dataset.from_list(processed_dataset)
-# Remove the redundant conversion
-# train_dataset = list(train_dataset)
-# train_dataset = Dataset.from_list(train_dataset)
-# Convert to standard dataset for training
-train_dataset = list(train_dataset)
-train_dataset = Dataset.from_list(train_dataset)
-# Configure SFT parameters
-sft_config = SFTConfig(
-    output_dir="phi2-finetuned",
-    num_train_epochs=1,
-    max_steps=500,
-    per_device_train_batch_size=4,
-    gradient_accumulation_steps=1,
-    learning_rate=2e-4,
-    weight_decay=0.001,
-    logging_steps=1,
-    logging_strategy="steps",
-    save_strategy="steps",
-    save_steps=100,
-    save_total_limit=3,
-    push_to_hub=False,
-    max_seq_length=512,
-    report_to="none",
 )
-# Initialize trainer
-trainer = SFTTrainer(
-    model=model,
-    train_dataset=train_dataset,  # Changed from dataset to train_dataset
-    peft_config=peft_config,
-    args=sft_config,
 )
-# Train the model
-trainer.train()
-# Save the trained model in Hugging Face format
-trainer.save_model("phi2-finetuned-final")
-# Save the model in PyTorch format
-model_save_path = "phi2-finetuned-final/model.pt"
-torch.save({
-    'model_state_dict': trainer.model.state_dict(),
-    'config': trainer.model.config,
-    'peft_config': peft_config,
-}, model_save_path)
-print(f"Model saved in PyTorch format at: {model_save_path}")

+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
 import torch
+# Load base model and tokenizer
 model_name = "microsoft/phi-2"
+base_model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    device_map={"": "cpu"},  # Force CPU usage
     trust_remote_code=True
 )
 tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+# Load fine-tuned adapter
+model = PeftModel.from_pretrained(
+    base_model,
+    "phi2-finetuned-final",
+    device_map={"": "cpu"}  # Force CPU usage
 )
+def generate_response(message, history):
+    # Format input as instruction-based conversation
+    prompt = "You are a helpful AI assistant. Please provide clear and concise responses.\n\n"
+    for human, assistant in history[-7:]:  # Keep last 7 exchanges for context
+        prompt += f"Instruction: {human}\nResponse: {assistant}\n\n"
+    prompt += f"Instruction: {message}\nResponse:"
+    # Generate response with limited length
+    inputs = tokenizer(prompt, return_tensors="pt")
+    with torch.no_grad():
+        outputs = model.generate(
+            **inputs,
+            max_new_tokens=96,     # Limited to 96 tokens
+            max_length=512,        # Keep history context at 512
+            temperature=0.6,
+            num_return_sequences=1,
+            pad_token_id=tokenizer.eos_token_id,
+            do_sample=True,
+            top_p=0.7,
+            min_length=1,
+            eos_token_id=tokenizer.eos_token_id,
+            early_stopping=True,
+            no_repeat_ngram_size=3,
+            repetition_penalty=1.2
+        )
+    response = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
+    return response.strip()
+# Create Gradio interface
+css = """
+.gradio-container {max-width: 1000px !important}
+.chatbot {min-height: 700px !important}
+.chat-message {font-size: 16px !important}
+"""
+demo = gr.ChatInterface(
+    generate_response,
+    chatbot=gr.Chatbot(height=700),  # Increased height
+    textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=0.9),
+    title="Phi-2 Conversational Assistant",
+    description="A fine-tuned Phi-2 model for conversational AI",
+    theme="soft",
+    css=css,
+    examples=["Tell me about yourself",
+             "What can you help me with?",
+             "How do you process information?"],
 )
+if __name__ == "__main__":
+    demo.launch(share=True)