Chat_with_Meta_llama3_8b

Runtime error

App Files Files Community

CreativeWorks commited on Jun 15, 2024

Commit

ab711c8

verified ·

1 Parent(s): 0ce19af

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -10

app.py CHANGED Viewed

@@ -9,6 +9,14 @@ from threading import Thread
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 DESCRIPTION = '''
 <div>
 <h1 style="text-align: center;">Meta Llama3 8B</h1>
@@ -59,13 +67,13 @@ terminators = [
 ]
 @spaces.GPU(duration=120)
-def chat_llama3_8b(message: str,
               history: list,
               temperature: float,
               max_new_tokens: int
              ) -> str:
     """
-    Generate a streaming response using the llama3-8b model.
     Args:
         message (str): The input message.
         history (list): The conversation history used by ChatInterface.
@@ -78,13 +86,11 @@ def chat_llama3_8b(message: str,
     for user, assistant in history:
         conversation.extend([{"from": "human", "value": user}, {"from": "assistant", "value": assistant}])
     conversation.append({"from": "human", "value": message})
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
-        input_ids= input_ids,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
@@ -92,17 +98,18 @@ def chat_llama3_8b(message: str,
         eos_token_id=terminators,
         pad_token_id=tokenizer.eos_token_id
     )
-    # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
     if temperature == 0:
         generate_kwargs['do_sample'] = False
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     outputs = []
     for text in streamer:
         outputs.append(text)
-        #print(outputs)
         yield "".join(outputs)
@@ -114,7 +121,7 @@ with gr.Blocks(fill_height=True, css=css) as demo:
     gr.Markdown(DESCRIPTION)
     #gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
     gr.ChatInterface(
-        fn=chat_llama3_8b,
         chatbot=chatbot,
         fill_height=True,
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
@@ -145,5 +152,5 @@ with gr.Blocks(fill_height=True, css=css) as demo:
     gr.Markdown(LICENSE)
 if __name__ == "__main__":
-    demo.launch(share=True)

 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+# Lê as variáveis de ambiente para autenticação e compartilhamento
+auth_users = os.getenv("GRADIO_AUTH_USERS")
+auth_passwords = os.getenv("GRADIO_AUTH_PASSWORDS")
+# Converte as strings de usuários e senhas em listas
+auth_users = [user.strip() for user in auth_users.split(",")]
+auth_passwords = [password.strip() for password in auth_passwords.split(",")]
+# Cria um dicionário de autenticação
+auth_credentials = dict(zip(auth_users, auth_passwords))
 DESCRIPTION = '''
 <div>
 <h1 style="text-align: center;">Meta Llama3 8B</h1>
 ]
 @spaces.GPU(duration=120)
+def CreativeWorks_Mistral_7b_Chat_V1(message: str,
               history: list,
               temperature: float,
               max_new_tokens: int
              ) -> str:
     """
+    Generate a streaming response using the Mistral model.
     Args:
         message (str): The input message.
         history (list): The conversation history used by ChatInterface.
     for user, assistant in history:
         conversation.extend([{"from": "human", "value": user}, {"from": "assistant", "value": assistant}])
     conversation.append({"from": "human", "value": message})
     input_ids = tokenizer.apply_chat_template(conversation, return_tensors="pt").to(model.device)
     streamer = TextIteratorStreamer(tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
     generate_kwargs = dict(
+        input_ids=input_ids,
         streamer=streamer,
         max_new_tokens=max_new_tokens,
         do_sample=True,
         eos_token_id=terminators,
         pad_token_id=tokenizer.eos_token_id
     )
+    # This will enforce greedy generation (do_sample=False) when the temperature is passed 0, avoiding the crash.
     if temperature == 0:
         generate_kwargs['do_sample'] = False
     t = Thread(target=model.generate, kwargs=generate_kwargs)
     t.start()
     outputs = []
     for text in streamer:
+        # Remove the unwanted prefix if present
+        text = text.replace("<|im_start|>assistant", " ")
         outputs.append(text)
         yield "".join(outputs)
     gr.Markdown(DESCRIPTION)
     #gr.DuplicateButton(value="Duplicate Space for private use", elem_id="duplicate-button")
     gr.ChatInterface(
+        fn=CreativeWorks_Mistral_7b_Chat_V1,
         chatbot=chatbot,
         fill_height=True,
         additional_inputs_accordion=gr.Accordion(label="⚙️ Parameters", open=False, render=False),
     gr.Markdown(LICENSE)
 if __name__ == "__main__":
+    demo.launch(auth=auth_credentials, share=True)