Spaces:

nikhiljais
/

Phi2-QLoRa-OASST

Runtime error

App Files Files Community

nikhiljais commited on Mar 7

Commit

81dd5ea

verified ·

1 Parent(s): 2e98b71

Upload folder using huggingface_hub

Browse files

Files changed (9) hide show

app.py +153 -111
checkpoints/adapter/adapter_config.json +34 -0
checkpoints/tokenizer/added_tokens.json +40 -0
checkpoints/tokenizer/merges.txt +0 -0
checkpoints/tokenizer/special_tokens_map.json +30 -0
checkpoints/tokenizer/tokenizer.json +0 -0
checkpoints/tokenizer/tokenizer_config.json +326 -0
checkpoints/tokenizer/vocab.json +0 -0
requirements.txt +5 -5

app.py CHANGED Viewed

@@ -1,112 +1,154 @@
-import gradio as gr
-from transformers import AutoModelForCausalLM, AutoTokenizer
-from peft import PeftModel
-import torch
-# Model configuration
-MODEL_PATH = "nikhiljais/Phi2-QLoRa-OSST"
-BASE_MODEL = "microsoft/phi-2"
-class Phi2Chat:
-    def __init__(self):
-        print("Loading tokenizer...")
-        self.tokenizer = AutoTokenizer.from_pretrained(MODEL_PATH)
-        print("Loading base model...")
-        # Modified to use CPU with reduced precision
-        base_model = AutoModelForCausalLM.from_pretrained(
-            BASE_MODEL,
-            device_map="cpu",
-            torch_dtype=torch.float32,
-            low_cpu_mem_usage=True
-        )
-        print("Loading fine-tuned model...")
-        self.model = PeftModel.from_pretrained(base_model, MODEL_PATH)
-        self.model.eval()
-        # Move model to CPU if available memory is limited
-        if torch.cuda.is_available() and torch.cuda.mem_get_info()[0] > 8 * 1024 * 1024 * 1024:  # 8GB
-            self.model = self.model.to("cuda")
-        self.chat_template = """<|im_start|>user
-{prompt}\n<|im_end|>
-<|im_start|>assistant
-"""
-    def generate_response(
-        self,
-        prompt: str,
-        max_new_tokens: int = 300,
-        temperature: float = 0.7,
-        top_p: float = 0.9
-    ) -> str:
-        formatted_prompt = self.chat_template.format(prompt=prompt)
-        inputs = self.tokenizer(formatted_prompt, return_tensors="pt")
-        # Move inputs to the same device as model
-        inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
-        with torch.no_grad():
-            output = self.model.generate(
-                **inputs,
-                max_new_tokens=max_new_tokens,
-                temperature=temperature,
-                top_p=top_p,
-                do_sample=True
-            )
-        response = self.tokenizer.decode(output[0], skip_special_tokens=True)
-        # Extract only the assistant's response
-        try:
-            response = response.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0].strip()
-        except:
-            response = response.split(prompt)[-1].strip()
-        return response
-# Initialize model
-phi2_chat = Phi2Chat()
-def chat_response(message, history):
-    response = phi2_chat.generate_response(message)
-    return response
-# Create Gradio interface
-css = """
-.gradio-container {
-    font-family: 'IBM Plex Sans', sans-serif;
-}
-.chat-message {
-    padding: 1rem;
-    border-radius: 0.5rem;
-    margin-bottom: 1rem;
-    background: #f7f7f7;
-}
-"""
-with gr.Blocks(css=css) as demo:
-    gr.Markdown("# Phi-2 Fine-tuned Chat Assistant")
-    gr.Markdown("""
-    This is a fine-tuned version of Microsoft's Phi-2 model using QLoRA.
-    The model has been trained on the OpenAssistant dataset to improve its conversational abilities.
-    """)
-    chatbot = gr.ChatInterface(
-        chat_response,
-        chatbot=gr.Chatbot(height=400),
-        textbox=gr.Textbox(placeholder="Type your message here...", container=False, scale=7),
-        title="Chat with Phi-2",
-        description="Have a conversation with the fine-tuned Phi-2 model",
-        theme="soft",
-        examples=[
-            "What is quantum computing?",
-            "Write a Python function to find prime numbers",
-            "Explain the concept of machine learning in simple terms"
-        ],
-        retry_btn="Retry",
-        undo_btn="Undo",
-        clear_btn="Clear",
-    )
 demo.launch()

+import gradio as gr
+from transformers import AutoModelForCausalLM, AutoTokenizer
+from peft import PeftModel
+import torch
+import os
+# Model configuration
+CHECKPOINT_DIR = "checkpoints"
+BASE_MODEL = "microsoft/phi-2"
+class Phi2Chat:
+    def __init__(self):
+        self.tokenizer = None
+        self.model = None
+        self.is_loaded = False
+        self.chat_template = """<|im_start|>user
+{prompt}\n<|im_end|>
+<|im_start|>assistant
+"""
+    def load_model(self):
+        """Lazy loading of the model"""
+        if not self.is_loaded:
+            try:
+                print("Loading tokenizer...")
+                # Load tokenizer from local checkpoint
+                self.tokenizer = AutoTokenizer.from_pretrained(
+                    os.path.join(CHECKPOINT_DIR, "tokenizer"),
+                    local_files_only=True
+                )
+                print("Loading base model...")
+                base_model = AutoModelForCausalLM.from_pretrained(
+                    BASE_MODEL,
+                    device_map="cpu",
+                    torch_dtype=torch.float32,
+                    low_cpu_mem_usage=True
+                )
+                print("Loading fine-tuned model...")
+                # Load adapter from local checkpoint
+                self.model = PeftModel.from_pretrained(
+                    base_model,
+                    os.path.join(CHECKPOINT_DIR, "adapter"),
+                    local_files_only=True
+                )
+                self.model.eval()
+                # Try to move to GPU if available
+                if torch.cuda.is_available():
+                    try:
+                        self.model = self.model.to("cuda")
+                        print("Model moved to GPU")
+                    except Exception as e:
+                        print(f"Could not move model to GPU: {e}")
+                self.is_loaded = True
+                print("Model loading completed!")
+            except Exception as e:
+                print(f"Error loading model: {e}")
+                raise e
+    def generate_response(
+        self,
+        prompt: str,
+        max_new_tokens: int = 300,
+        temperature: float = 0.7,
+        top_p: float = 0.9
+    ) -> str:
+        if not self.is_loaded:
+            return "Model is still loading... Please try again in a moment."
+        try:
+            formatted_prompt = self.chat_template.format(prompt=prompt)
+            inputs = self.tokenizer(formatted_prompt, return_tensors="pt")
+            inputs = {k: v.to(self.model.device) for k, v in inputs.items()}
+            with torch.no_grad():
+                output = self.model.generate(
+                    **inputs,
+                    max_new_tokens=max_new_tokens,
+                    temperature=temperature,
+                    top_p=top_p,
+                    do_sample=True
+                )
+            response = self.tokenizer.decode(output[0], skip_special_tokens=True)
+            try:
+                response = response.split("<|im_start|>assistant\n")[-1].split("<|im_end|>")[0].strip()
+            except:
+                response = response.split(prompt)[-1].strip()
+            return response
+        except Exception as e:
+            return f"Error generating response: {str(e)}"
+# Initialize model
+phi2_chat = Phi2Chat()
+def loading_message():
+    return "Loading the model... This may take a few minutes. Please wait."
+def chat_response(message, history):
+    # Ensure model is loaded
+    if not phi2_chat.is_loaded:
+        phi2_chat.load_model()
+    return phi2_chat.generate_response(message)
+# Create Gradio interface
+css = """
+.gradio-container {
+    font-family: 'IBM Plex Sans', sans-serif;
+}
+.chat-message {
+    padding: 1rem;
+    border-radius: 0.5rem;
+    margin-bottom: 1rem;
+    background: #f7f7f7;
+}
+"""
+with gr.Blocks(css=css) as demo:
+    gr.Markdown("# Phi-2 Fine-tuned Chat Assistant")
+    gr.Markdown("""
+    This is a fine-tuned version of Microsoft's Phi-2 model using QLoRA.
+    The model has been trained on the OpenAssistant dataset to improve its conversational abilities.
+    Note: First-time loading may take a few minutes. Please be patient.
+    """)
+    chatbot = gr.ChatInterface(
+        chat_response,
+        chatbot=gr.Chatbot(height=400),
+        textbox=gr.Textbox(
+            placeholder="Type your message here... (Model will load on first message)",
+            container=False,
+            scale=7
+        ),
+        title="Chat with Phi-2",
+        description="Have a conversation with the fine-tuned Phi-2 model",
+        theme="soft",
+        examples=[
+            "What is quantum computing?",
+            "Write a Python function to find prime numbers",
+            "Explain the concept of machine learning in simple terms"
+        ],
+        retry_btn="Retry",
+        undo_btn="Undo",
+        clear_btn="Clear",
+    )
+# Configure queue and launch
+demo.queue(concurrency_count=1, max_size=10)
 demo.launch()

checkpoints/adapter/adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "microsoft/phi-2",
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_bias": false,
+  "lora_dropout": 0.1,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "q_proj",
+    "v_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_rslora": false
+}

checkpoints/tokenizer/added_tokens.json ADDED Viewed

	@@ -0,0 +1,40 @@

+{
+  "\t\t": 50294,
+  "\t\t\t": 50293,
+  "\t\t\t\t": 50292,
+  "\t\t\t\t\t": 50291,
+  "\t\t\t\t\t\t": 50290,
+  "\t\t\t\t\t\t\t": 50289,
+  "\t\t\t\t\t\t\t\t": 50288,
+  "\t\t\t\t\t\t\t\t\t": 50287,
+  "  ": 50286,
+  "   ": 50285,
+  "    ": 50284,
+  "     ": 50283,
+  "      ": 50282,
+  "       ": 50281,
+  "        ": 50280,
+  "         ": 50279,
+  "          ": 50278,
+  "           ": 50277,
+  "            ": 50276,
+  "             ": 50275,
+  "              ": 50274,
+  "               ": 50273,
+  "                ": 50272,
+  "                 ": 50271,
+  "                  ": 50270,
+  "                   ": 50269,
+  "                    ": 50268,
+  "                     ": 50267,
+  "                      ": 50266,
+  "                       ": 50265,
+  "                        ": 50264,
+  "                         ": 50263,
+  "                          ": 50262,
+  "                           ": 50261,
+  "                            ": 50260,
+  "                             ": 50259,
+  "                              ": 50258,
+  "                               ": 50257
+}

checkpoints/tokenizer/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/tokenizer/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,30 @@

+{
+  "bos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<|endoftext|>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

checkpoints/tokenizer/tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

checkpoints/tokenizer/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,326 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "50256": {
+      "content": "<|endoftext|>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50257": {
+      "content": "                               ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50258": {
+      "content": "                              ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50259": {
+      "content": "                             ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50260": {
+      "content": "                            ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50261": {
+      "content": "                           ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50262": {
+      "content": "                          ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50263": {
+      "content": "                         ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50264": {
+      "content": "                        ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50265": {
+      "content": "                       ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50266": {
+      "content": "                      ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50267": {
+      "content": "                     ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50268": {
+      "content": "                    ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50269": {
+      "content": "                   ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50270": {
+      "content": "                  ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50271": {
+      "content": "                 ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50272": {
+      "content": "                ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50273": {
+      "content": "               ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50274": {
+      "content": "              ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50275": {
+      "content": "             ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50276": {
+      "content": "            ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50277": {
+      "content": "           ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50278": {
+      "content": "          ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50279": {
+      "content": "         ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50280": {
+      "content": "        ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50281": {
+      "content": "       ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50282": {
+      "content": "      ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50283": {
+      "content": "     ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50284": {
+      "content": "    ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50285": {
+      "content": "   ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50286": {
+      "content": "  ",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50287": {
+      "content": "\t\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50288": {
+      "content": "\t\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50289": {
+      "content": "\t\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50290": {
+      "content": "\t\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50291": {
+      "content": "\t\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50292": {
+      "content": "\t\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50293": {
+      "content": "\t\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    },
+    "50294": {
+      "content": "\t\t",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
+    }
+  },
+  "bos_token": "<|endoftext|>",
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "<|endoftext|>",
+  "extra_special_tokens": {},
+  "model_max_length": 2048,
+  "pad_token": "<|endoftext|>",
+  "return_token_type_ids": false,
+  "tokenizer_class": "CodeGenTokenizer",
+  "unk_token": "<|endoftext|>"
+}

checkpoints/tokenizer/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff

requirements.txt CHANGED Viewed

@@ -1,6 +1,6 @@
-transformers>=4.36.0
-torch>=2.0.0
-peft>=0.14
-accelerate>=0.25.0
-gradio>=4.44.1
 scipy

+transformers>=4.36.0
+torch>=2.0.0
+peft>=0.14
+accelerate>=0.25.0
+gradio>=4.44.1
 scipy