Mixtral-RPG

Build error

Omnibus commited on Jan 22, 2024

Commit

62a62ca

verified ·

1 Parent(s): 72b121b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from huggingface_hub import InferenceClient
 import gradio as gr
 import random
@@ -44,9 +44,9 @@ MAX_HISTORY=100
 opts=[]
 def generate(prompt, history,max_new_tokens,health,seed,temperature=temperature,top_p=top_p,repetition_penalty=repetition_penalty):
     opts.clear()
-    client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
     #client = InferenceClient("abacusai/Slerp-CM-mist-dpo")
     temperature = float(temperature)
     if temperature < 1e-2:
         temperature = 1e-2
@@ -79,10 +79,10 @@ def generate(prompt, history,max_new_tokens,health,seed,temperature=temperature,
     if cnt > MAX_HISTORY:
         history1 = compress_history(str(history), temperature, top_p, repetition_penalty)
     formatted_prompt = format_prompt(f"{GAME_MASTER.format(history=history1,stats=stats,dice=random.randint(1,10))}, {prompt}", history)
-    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
     output = ""
-    for response in stream:
         output += response.token.text
         if history:
             yield [(prompt,output)],stats,None,None

+from huggingface_hub import InferenceClient, AsyncInferenceClient
 import gradio as gr
 import random
 opts=[]
 def generate(prompt, history,max_new_tokens,health,seed,temperature=temperature,top_p=top_p,repetition_penalty=repetition_penalty):
     opts.clear()
+    #client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
     #client = InferenceClient("abacusai/Slerp-CM-mist-dpo")
+    client = AsyncInferenceClient()
     temperature = float(temperature)
     if temperature < 1e-2:
         temperature = 1e-2
     if cnt > MAX_HISTORY:
         history1 = compress_history(str(history), temperature, top_p, repetition_penalty)
     formatted_prompt = format_prompt(f"{GAME_MASTER.format(history=history1,stats=stats,dice=random.randint(1,10))}, {prompt}", history)
+    stream = await client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
     output = ""
+    async for response in await stream:
         output += response.token.text
         if history:
             yield [(prompt,output)],stats,None,None