Spaces:

umarigan
/

Llama-3-8B-tr

Sleeping

App Files Files Community

Nekochu commited on Mar 19, 2024

Commit

6251f1e

verified ·

1 Parent(s): 36bc0f1

Update tabbed.py

Browse files

Files changed (1) hide show

tabbed.py +9 -4

tabbed.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import gradio as gr
 import yaml
-from huggingface_hub import hf_hub_download
 from huggingface_hub.utils import LocalEntryNotFoundError
 from llama_cpp import Llama
@@ -24,6 +25,10 @@ while True:
 llm = Llama(model_path=fp, **config["llama_cpp"])
 def user(message, history):
     history = history or []
@@ -31,7 +36,7 @@ def user(message, history):
     history.append([message, ""])
     return "", history
 def chat(history, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
     history = history or []
@@ -59,7 +64,7 @@ def chat(history, system_message, max_tokens, temperature, top_p, top_k, repeat_
         # stream the response
         yield history, history
 def rp_chat(history, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
     history = history or []
@@ -119,7 +124,7 @@ with gr.Blocks() as demo:
     with gr.Row():
         with gr.Column():
             gr.Markdown(f"""
-                    ### This is the [{config["hub"]["repo_id"]}](https://huggingface.co/{config["hub"]["repo_id"]}) quantized model file [{config["hub"]["filename"]}](https://huggingface.co/{config["hub"]["repo_id"]}/blob/main/{config["hub"]["filename"]})
                     <details>
                         <summary><a href="https://huggingface.co/spaces/Nekochu/Llama-2-13B-novel17-french-GGUF?duplicate=true">Duplicate the Space</a> to skip the queue and run in a private space or to use your own GGUF models, simply update the <a href="https://huggingface.co/spaces/Nekochu/Llama-2-13B-novel17-french-GGUF/blob/main/config.yml">config.yml</a></summary>

 import gradio as gr
 import yaml
+import spaces
+from huggingface_hub import hf_hub_download, SpaceVariable
 from huggingface_hub.utils import LocalEntryNotFoundError
 from llama_cpp import Llama
 llm = Llama(model_path=fp, **config["llama_cpp"])
+_space_name = os.getenv('HF_SPACE')
+space_name = SpaceVariable("MODEL_REPO_ID").value
+base_url = f"https://huggingface.co/spaces/{space_name}/blob/main/config.yml"
+duplicate_url = f"https://huggingface.co/spaces/{space_name}?duplicate=true"
 def user(message, history):
     history = history or []
     history.append([message, ""])
     return "", history
+@spaces.GPU
 def chat(history, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
     history = history or []
         # stream the response
         yield history, history
+@spaces.GPU
 def rp_chat(history, system_message, max_tokens, temperature, top_p, top_k, repeat_penalty):
     history = history or []
     with gr.Row():
         with gr.Column():
             gr.Markdown(f"""
+                    ### This {_space_name} & {space_name} is the [{config["hub"]["repo_id"]}](https://huggingface.co/{config["hub"]["repo_id"]}) quantized model file [{config["hub"]["filename"]}](https://huggingface.co/{config["hub"]["repo_id"]}/blob/main/{config["hub"]["filename"]})
                     <details>
                         <summary><a href="https://huggingface.co/spaces/Nekochu/Llama-2-13B-novel17-french-GGUF?duplicate=true">Duplicate the Space</a> to skip the queue and run in a private space or to use your own GGUF models, simply update the <a href="https://huggingface.co/spaces/Nekochu/Llama-2-13B-novel17-french-GGUF/blob/main/config.yml">config.yml</a></summary>