nikhiljais commited on
Commit
ea6cbcb
·
verified ·
1 Parent(s): bd1714b

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +35 -12
  2. app.py +103 -0
  3. requirements.txt +6 -0
README.md CHANGED
@@ -1,12 +1,35 @@
1
- ---
2
- title: Phi2 QLoRa OASST
3
- emoji: 🌍
4
- colorFrom: pink
5
- colorTo: blue
6
- sdk: gradio
7
- sdk_version: 5.20.1
8
- app_file: app.py
9
- pinned: false
10
- ---
11
-
12
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Phi-2 Fine-tuned Chat Assistant
2
+
3
+ This Space hosts a fine-tuned version of Microsoft's Phi-2 model using QLoRA (Quantized Low-Rank Adaptation). The model has been trained on the OpenAssistant dataset to improve its conversational abilities.
4
+
5
+ ## Model Details
6
+
7
+ - Base Model: Microsoft Phi-2
8
+ - Training Method: QLoRA (4-bit quantization)
9
+ - Dataset: OpenAssistant Conversations Dataset
10
+ - Fine-tuning Parameters:
11
+ - LoRA rank: 16
12
+ - LoRA alpha: 32
13
+ - Dropout: 0.1
14
+ - Target modules: q_proj, v_proj
15
+
16
+ ## Usage
17
+
18
+ Simply type your message in the input box and press Enter. The model will generate a response based on your input. You can also try the example prompts provided below the chat interface.
19
+
20
+ ## Features
21
+
22
+ - Interactive chat interface
23
+ - Real-time response generation
24
+ - Example prompts for quick testing
25
+ - Configurable generation parameters (temperature, top-p)
26
+
27
+ ## Limitations
28
+
29
+ - The model may occasionally generate incorrect or inconsistent responses
30
+ - Response generation time may vary depending on the input length and server load
31
+ - The model's knowledge is limited to its training data
32
+
33
+ ## License
34
+
35
+ This Space uses the Microsoft Phi-2 model which is subject to its original license. The fine-tuning additions are provided under [Your License].
app.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ from peft import PeftModel
4
+ import torch
5
+
6
+ # Model configuration
7
+ MODEL_PATH = "YOUR_HF_USERNAME/YOUR_MODEL_NAME" # Replace with your model path
8
+ BASE_MODEL = "microsoft/phi-2"
9
+
10
+ class Phi2Chat:
11
+ def __init__(self):
12
+ print("Loading tokenizer...")
13
+ self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
14
+
15
+ print("Loading base model...")
16
+ base_model = AutoModelForCausalLM.from_pretrained(
17
+ BASE_MODEL,
18
+ device_map="auto",
19
+ torch_dtype=torch.float16
20
+ )
21
+
22
+ print("Loading fine-tuned model...")
23
+ self.model = PeftModel.from_pretrained(base_model, MODEL_PATH)
24
+ self.model.eval()
25
+
26
+ self.chat_template = """<|im_start|>user
27
+ {prompt}\n<|im_end|>
28
+ <|im_start|>assistant
29
+ """
30
+
31
+ def generate_response(
32
+ self,
33
+ prompt: str,
34
+ max_new_tokens: int = 300,
35
+ temperature: float = 0.7,
36
+ top_p: float = 0.9
37
+ ) -> str:
38
+ formatted_prompt = self.chat_template.format(prompt=prompt)
39
+ inputs = self.tokenizer(formatted_prompt, return_tensors="pt").to(self.model.device)
40
+
41
+ with torch.no_grad():
42
+ output = self.model.generate(
43
+ **inputs,
44
+ max_new_tokens=max_new_tokens,
45
+ temperature=temperature,
46
+ top_p=top_p,
47
+ do_sample=True
48
+ )
49
+
50
+ response = self.tokenizer.decode(output[0], skip_special_tokens=True)
51
+ # Extract only the assistant's response
52
+ try:
53
+ response = response.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0].strip()
54
+ except:
55
+ response = response.split(prompt)[-1].strip()
56
+
57
+ return response
58
+
59
+ # Initialize model
60
+ phi2_chat = Phi2Chat()
61
+
62
+ def chat_response(message, history):
63
+ response = phi2_chat.generate_response(message)
64
+ return response
65
+
66
+ # Create Gradio interface
67
+ css = """
68
+ .gradio-container {
69
+ font-family: 'IBM Plex Sans', sans-serif;
70
+ }
71
+ .chat-message {
72
+ padding: 1rem;
73
+ border-radius: 0.5rem;
74
+ margin-bottom: 1rem;
75
+ background: #f7f7f7;
76
+ }
77
+ """
78
+
79
+ with gr.Blocks(css=css) as demo:
80
+ gr.Markdown("# Phi-2 Fine-tuned Chat Assistant")
81
+ gr.Markdown("""
82
+ This is a fine-tuned version of Microsoft's Phi-2 model using QLoRA.
83
+ The model has been trained on the OpenAssistant dataset to improve its conversational abilities.
84
+ """)
85
+
86
+ chatbot = gr.ChatInterface(
87
+ chat_response,
88
+ chatbot=gr.Chatbot(height=400),
89
+ textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7),
90
+ title="Chat with Phi-2",
91
+ description="Have a conversation with the fine-tuned Phi-2 model",
92
+ theme="soft",
93
+ examples=[
94
+ "What is quantum computing?",
95
+ "Write a Python function to find prime numbers",
96
+ "Explain the concept of machine learning in simple terms"
97
+ ],
98
+ retry_btn="Retry",
99
+ undo_btn="Undo",
100
+ clear_btn="Clear",
101
+ )
102
+
103
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers>=4.36.0
2
+ torch>=2.0.0
3
+ peft>=0.7.0
4
+ accelerate>=0.25.0
5
+ bitsandbytes>=0.41.0
6
+ gradio>=4.0.0