Qwen-VL-Chat

Paused

App Files Files Community

Tonic commited on Nov 14, 2023

Commit

7c96374

1 Parent(s): f465778

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -9

app.py CHANGED Viewed

@@ -21,9 +21,10 @@ model_name = "OpenLLM-France/Claire-7B-0.1"
 tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
 model = transformers.AutoModelForCausalLM.from_pretrained(model_name,
     device_map="auto",
-    torch_dtype=torch.bfloat16,
     load_in_4bit=True                          # For efficient inference, if supported by the GPU card
 )
 # Class to encapsulate the Falcon chatbot
 class FalconChatBot:
@@ -50,22 +51,35 @@ class FalconChatBot:
         return filtered_history
     def predict(self, user_message, assistant_message, history, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
         # Process the history to remove special commands
         processed_history = self.process_history(history)
         # Combine the user and assistant messages into a conversation
-        conversation = f"{self.system_prompt}\nFalcon: {assistant_message if assistant_message else ''} User: {user_message}\nFalcon:\n"
         # Encode the conversation using the tokenizer
         input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=False)
         # Generate a response using the Falcon model
-        response = model.generate(input_ids=input_ids, max_length=max_length, use_cache=False, early_stopping=False, bos_token_id=model.config.bos_token_id, eos_token_id=model.config.eos_token_id, pad_token_id=model.config.eos_token_id, temperature=0.4, do_sample=True)
         # Decode the generated response to text
         response_text = tokenizer.decode(response[0], skip_special_tokens=True)
-        # Append the Falcon-like conversation to the history
-        self.history.append(conversation)
-        self.history.append(response_text)
-        return response_text
 # Create the Falcon chatbot instance

 tokenizer = transformers.AutoTokenizer.from_pretrained(model_name)
 model = transformers.AutoModelForCausalLM.from_pretrained(model_name,
     device_map="auto",
+    torch_dtype=torch.bfloat16.to("cuda"),
     load_in_4bit=True                          # For efficient inference, if supported by the GPU card
 )
+model = model.to_bettertransformer()
 # Class to encapsulate the Falcon chatbot
 class FalconChatBot:
         return filtered_history
     def predict(self, user_message, assistant_message, history, temperature=0.4, max_new_tokens=700, top_p=0.99, repetition_penalty=1.9):
+        input_ids = input_ids.to(device)
         # Process the history to remove special commands
         processed_history = self.process_history(history)
         # Combine the user and assistant messages into a conversation
+        conversation = f"{self.system_prompt}\n {assistant_message if assistant_message else ''}\n {user_message}\n "
         # Encode the conversation using the tokenizer
         input_ids = tokenizer.encode(conversation, return_tensors="pt", add_special_tokens=False)
         # Generate a response using the Falcon model
+        response = model.generate(
+            input_ids=input_ids,
+            max_length=max_length,
+            use_cache=False,
+            early_stopping=False,
+            bos_token_id=model.config.bos_token_id,
+            eos_token_id=model.config.eos_token_id,
+            pad_token_id=model.config.eos_token_id,
+            temperature=temperature,
+            do_sample=True,
+            max_new_tokens=max_new_tokens,
+            top_p=top_p,
+            repetition_penalty=repetition_penalty
+        )        # Decode the generated response to text
         # Decode the generated response to text
         response_text = tokenizer.decode(response[0], skip_special_tokens=True)
+        # Update and return the history with the new conversation
+        updated_history = processed_history + [{"user": user_message, "assistant": response_text}]
+        return response_text, updated_history
 # Create the Falcon chatbot instance