Spaces:

artificialguybr
/

CODELLAMA-34B-FREE-DEMO

Sleeping

App Files Files Community

artificialguybr commited on Mar 8, 2024

Commit

db1061f

1 Parent(s): 62cbda2

Refactor API headers and update NVIDIA API call

Browse files

Files changed (1) hide show

app.py +32 -36

app.py CHANGED Viewed

@@ -3,14 +3,14 @@ import requests
 import json
 import os
-# Definir variáveis de ambiente ou substituir com sua chave de API real
-API_KEY = os.getenv('API_KEY')
 INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/df2bee43-fb69-42b9-9ee5-f4eabbeaf3a8"
 headers = {
     "Authorization": f"Bearer {API_KEY}",
-    "accept": "text/event-stream",
-    "content-type": "application/json",
 }
 BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."
@@ -27,45 +27,41 @@ def user(message, history):
     history.append({"role": "user", "content": message})
     return history
-def call_nvidia_api(history, max_tokens, temperature, top_p, seed=42):
     # Preparar o payload com o histórico de chat formatado
-    messages = [{"role": "user" if i % 2 == 0 else "assistant", "content": msg} for i, msg in enumerate(history)]
     payload = {
-        "messages": messages,
-        "temperature": temperature,
-        "top_p": top_p,
-        "max_tokens": max_tokens,
-        "seed": seed,
-        "stream": True
     }
-    response = requests.post(INVOKE_URL, headers=headers, json=payload, stream=True)
-    full_response = ""
-    for line in response.iter_lines():
-        if line:
-            decoded_line = line.decode("utf-8").strip()
-            if decoded_line.startswith("data:"):
-                try:
-                    json_data = json.loads(decoded_line[5:])
-                    # Processar a resposta da API aqui
-                    # Supondo que a resposta da API seja diretamente o texto a ser adicionado ao chat
-                    full_response += json_data.get("content", "")
-                except json.JSONDecodeError:
-                    print(f"Invalid JSON: {decoded_line[5:]}")
-    return full_response
-def chat(history, system_message, max_tokens, temperature, top_p, top_k, repetition_penalty):
     print("Starting chat...")
-    # Chamar a API da NVIDIA aqui com o histórico formatado
-    assistant_response = call_nvidia_api(history, max_tokens, temperature, top_p)
     # Atualizar o histórico com a resposta do assistente
-    if history:
-        history[-1][1] += assistant_response
-    else:
-        history.append(["", assistant_response])
-    return history, history, ""
 # Gradio interface setup
 with gr.Blocks() as demo:
     with gr.Row():

 import json
 import os
+API_KEY = os.getenv('API_KEY')
 INVOKE_URL = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/df2bee43-fb69-42b9-9ee5-f4eabbeaf3a8"
+FETCH_URL_FORMAT = "https://api.nvcf.nvidia.com/v2/nvcf/pexec/status/"
 headers = {
     "Authorization": f"Bearer {API_KEY}",
+    "Accept": "application/json",
+    "Content-Type": "application/json",
 }
 BASE_SYSTEM_MESSAGE = "I carefully provide accurate, factual, thoughtful, nuanced answers and am brilliant at reasoning."
     history.append({"role": "user", "content": message})
     return history
+def call_nvidia_api(history):
     # Preparar o payload com o histórico de chat formatado
     payload = {
+        "messages": history,
+        "temperature": 0.7,
+        "top_p": 0.95,
+        "max_tokens": 500,
+        "seed": 42,
+        "stream": False  # Ajustado para False conforme nova especificação
     }
+    session = requests.Session()
+    response = session.post(INVOKE_URL, headers=headers, json=payload)
+    # Novo método de polling para verificar o status da resposta
+    while response.status_code == 202:
+        request_id = response.headers.get("NVCF-REQID")
+        fetch_url = FETCH_URL_FORMAT + request_id
+        response = session.get(fetch_url, headers=headers)
+    response.raise_for_status()
+    response_body = response.json()
+    # Processar a resposta da API aqui
+    if response_body["choices"]:
+        assistant_message = response_body["choices"][0]["message"]["content"]
+        history.append({"role": "assistant", "content": assistant_message})
+    return history
+def chat(history, system_message):
     print("Starting chat...")
     # Atualizar o histórico com a resposta do assistente
+    updated_history = call_nvidia_api(history)
+    return updated_history, ""
 # Gradio interface setup
 with gr.Blocks() as demo:
     with gr.Row():