mostafa-sh commited on
Commit
8f7ca1c
·
1 Parent(s): bedc20e

update endpoint

Browse files
Files changed (3) hide show
  1. .gitignore +3 -1
  2. app.py +1 -1
  3. utils/endpoint_utils.py +9 -9
.gitignore CHANGED
@@ -2,4 +2,6 @@ __pycache__/
2
  .devcontainer/
3
  .streamlit/
4
  .env
5
- private_data/
 
 
 
2
  .devcontainer/
3
  .streamlit/
4
  .env
5
+ private_data/
6
+ app_script.py
7
+ test.py
app.py CHANGED
@@ -292,7 +292,7 @@ if submit_button_placeholder.button("AI Answer", type="primary"):
292
  ]
293
 
294
  raw_synthesis_answer = get_inference_endpoint_response(
295
- model="deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
296
  messages=messages,
297
  temperature=synthesis_deepseek_temperature,
298
  top_p=synthesis_deepseek_top_p,
 
292
  ]
293
 
294
  raw_synthesis_answer = get_inference_endpoint_response(
295
+ model="tgi",#"deepseek-ai/DeepSeek-R1-0528-Qwen3-8B",
296
  messages=messages,
297
  temperature=synthesis_deepseek_temperature,
298
  top_p=synthesis_deepseek_top_p,
utils/endpoint_utils.py CHANGED
@@ -5,6 +5,7 @@ import re
5
  import json
6
 
7
  from huggingface_hub import InferenceClient
 
8
 
9
  # Helper function to parse the response
10
  def parse_thinking_response(response_text):
@@ -31,24 +32,23 @@ def get_inference_endpoint_response(
31
  """
32
  Serverless API (Pay-as-you-go)
33
  """
34
- client = InferenceClient(
35
- provider="auto",
36
- api_key=os.getenv("HF_API_KEY")
37
  )
38
 
39
- completion = client.chat.completions.create(
 
40
  model=model,
41
  messages=messages,
42
  temperature=temperature,
43
  top_p=top_p,
44
- max_tokens=max_tokens
 
45
  )
46
 
47
  # Get the raw response content
48
- raw_response = completion.choices[0].message.content
49
-
50
- return raw_response
51
-
52
 
53
  def get_custom_inference_endpoint_response(
54
  messages: list,
 
5
  import json
6
 
7
  from huggingface_hub import InferenceClient
8
+ from openai import OpenAI
9
 
10
  # Helper function to parse the response
11
  def parse_thinking_response(response_text):
 
32
  """
33
  Serverless API (Pay-as-you-go)
34
  """
35
+ client = OpenAI(
36
+ base_url = "https://w98rfd84portge8q.us-east-1.aws.endpoints.huggingface.cloud/v1/",
37
+ api_key = os.getenv("HF_ENDPOINT_TOKEN")
38
  )
39
 
40
+ # print(messages)
41
+ chat_completion = client.chat.completions.create(
42
  model=model,
43
  messages=messages,
44
  temperature=temperature,
45
  top_p=top_p,
46
+ max_tokens=max_tokens,
47
+ stream=False
48
  )
49
 
50
  # Get the raw response content
51
+ return chat_completion.choices[0].message.content
 
 
 
52
 
53
  def get_custom_inference_endpoint_response(
54
  messages: list,