fullstuckdev commited on
Commit
bb971ee
·
1 Parent(s): 160e363

add open AI

Browse files
Files changed (1) hide show
  1. app.py +18 -10
app.py CHANGED
@@ -1,20 +1,28 @@
1
  import gradio as gr
2
  import os
3
- import requests
4
 
5
  API_URL = "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
6
  headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"}
7
 
8
  async def generate_response(user_input):
9
- payload = {
10
- "model": "nvidia/Llama-3.1-Nemotron-70B-Instruct-HF",
11
- "messages": [{"role": "user", "content": user_input}],
12
- "max_tokens": 16384,
13
- "max_completion_tokens": 16384
14
- }
15
-
16
- response = requests.post(API_URL, headers=headers, json=payload)
17
- return response.json()[0]['generated_text']
 
 
 
 
 
 
 
 
18
 
19
  demo = gr.Interface(
20
  fn=generate_response,
 
1
  import gradio as gr
2
  import os
3
+ from openai import OpenAI
4
 
5
  API_URL = "https://api-inference.huggingface.co/models/nvidia/Llama-3.1-Nemotron-70B-Instruct-HF"
6
  headers = {"Authorization": f"Bearer {os.getenv('HUGGINGFACE_API_KEY')}"}
7
 
8
  async def generate_response(user_input):
9
+ client = OpenAI(
10
+ base_url="https://api-inference.huggingface.co/v1/",
11
+ api_key=os.getenv('HUGGINGFACE_API_KEY')
12
+ )
13
+
14
+ messages = [
15
+ {"role": "user", "content": user_input}
16
+ ]
17
+
18
+ response = client.chat.completions.create(
19
+ model="meta-llama/Llama-3.1-70B-Instruct",
20
+ messages=messages,
21
+ max_tokens=500,
22
+ stream=False
23
+ )
24
+
25
+ return response['choices'][0]['message']['content']
26
 
27
  demo = gr.Interface(
28
  fn=generate_response,