3v324v23 commited on
Commit
6860b2d
·
1 Parent(s): 7d96495

adding reties for space to wake up any downstream space

Browse files
Files changed (1) hide show
  1. app.py +86 -38
app.py CHANGED
@@ -41,7 +41,91 @@ lock = threading.Lock()
41
  # Create an OrderedDict to store clients, limited to 15 entries
42
  client_cache = OrderedDict()
43
  MAX_CACHE_SIZE = 15
44
- default_client=Client("pi19404/ai-worker", hf_token=API_TOKEN)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  def get_client_for_ip(ip_address,x_ip_token):
46
  """
47
  Retrieve or create a client for the given IP address.
@@ -118,43 +202,7 @@ def set_client_for_session(request: gr.Request):
118
  # The "gradio/text-to-image" space is a ZeroGPU space
119
 
120
 
121
- def my_inference_function(client,input_data, output_data,mode, max_length, max_new_tokens, model_size):
122
- """
123
- The main inference function to process input data and return results.
124
-
125
- Args:
126
- input_data (str or dict): The input data in JSON format.
127
- mode (str): The mode of operation ("scoring" or "generative").
128
- max_length (int): The maximum length of the input prompt.
129
- max_new_tokens (int): The maximum number of new tokens to generate.
130
- model_size (str): The size of the model to be used.
131
-
132
- Returns:
133
- str: The output data in JSON format.
134
- """
135
- with lock:
136
- try:
137
-
138
 
139
-
140
- result = client.predict(
141
- input_data=input_data,
142
- output_data=output_data,
143
- mode=mode,
144
- max_length=max_length,
145
- max_new_tokens=max_new_tokens,
146
- model_size=model_size,
147
- api_name="/my_inference_function"
148
- )
149
- print(result)
150
- print("entering return",result)
151
- return result # Pretty-print the JSON
152
- except json.JSONDecodeError:
153
- return json.dumps({"error": "Invalid JSON input"})
154
- except KeyError:
155
- return json.dumps({"error": "Missing 'input' key in JSON"})
156
- except ValueError as e:
157
- return json.dumps({"error": str(e)})
158
 
159
  with gr.Blocks() as demo:
160
  """
@@ -166,7 +214,7 @@ with gr.Blocks() as demo:
166
  - Other UI components (not shown in this snippet).
167
  - A load event that calls set_client_for_session when the interface is loaded.
168
  """
169
-
170
  gr.Markdown("## LLM Safety Evaluation")
171
  client = gr.State()
172
  with gr.Tab("ShieldGemma2"):
 
41
  # Create an OrderedDict to store clients, limited to 15 entries
42
  client_cache = OrderedDict()
43
  MAX_CACHE_SIZE = 15
44
+
45
+
46
+ def my_inference_function(client,input_data, output_data,mode, max_length, max_new_tokens, model_size):
47
+ """
48
+ The main inference function to process input data and return results.
49
+
50
+ Args:
51
+ input_data (str or dict): The input data in JSON format.
52
+ mode (str): The mode of operation ("scoring" or "generative").
53
+ max_length (int): The maximum length of the input prompt.
54
+ max_new_tokens (int): The maximum number of new tokens to generate.
55
+ model_size (str): The size of the model to be used.
56
+
57
+ Returns:
58
+ str: The output data in JSON format.
59
+ """
60
+ with lock:
61
+ try:
62
+
63
+
64
+
65
+ result = client[0].predict(
66
+ input_data=input_data,
67
+ output_data=output_data,
68
+ mode=mode,
69
+ max_length=max_length,
70
+ max_new_tokens=max_new_tokens,
71
+ model_size=model_size,
72
+ api_name="/my_inference_function"
73
+ )
74
+ print(result)
75
+ print("entering return",result)
76
+ return result # Pretty-print the JSON
77
+ except json.JSONDecodeError:
78
+ return json.dumps({"error": "Invalid JSON input"})
79
+ except KeyError:
80
+ return json.dumps({"error": "Missing 'input' key in JSON"})
81
+ except ValueError as e:
82
+ return json.dumps({"error": str(e)})
83
+
84
+
85
+ def wake_up_space_with_retries(space_url, token, retries=5, wait_time=10):
86
+ """
87
+ Attempt to wake up the Hugging Face Space with retries.
88
+ Retries a number of times in case of a delay due to the Space waking up.
89
+
90
+ :param space_url: The URL of the Hugging Face Space.
91
+ :param token: The Hugging Face API token.
92
+ :param retries: Number of retries if the Space is sleeping.
93
+ :param wait_time: Time to wait between retries (in seconds).
94
+ """
95
+ for attempt in range(retries):
96
+ try:
97
+ print(f"Attempt {attempt + 1} to wake up the Space...")
98
+
99
+ # Initialize the Gradio Client
100
+ client = Client(space_url, hf_token=token, timeout=httpx.Timeout(30.0)) # 30-second timeout
101
+
102
+
103
+ my_inference_function(client,"test input","",scoring,10,10,"2B")
104
+
105
+ # Make a prediction or call to wake the Space
106
+ #result = client.predict("<your_input>") # Replace with actual inputs
107
+ print("Space is awake and ready!")
108
+ return client
109
+
110
+ except httpx.ReadTimeout:
111
+ print(f"Request timed out on attempt {attempt + 1}. Retrying in {wait_time} seconds...")
112
+ time.sleep(wait_time)
113
+
114
+ except Exception as e:
115
+ print(f"An error occurred on attempt {attempt + 1}: {e}")
116
+
117
+ # Wait before retrying
118
+ if attempt < retries - 1:
119
+ print(f"Waiting for {wait_time} seconds before retrying...")
120
+
121
+ print("Space is still not active after multiple attempts.")
122
+ return None
123
+
124
+
125
+ #default_client=Client("pi19404/ai-worker", hf_token=API_TOKEN)
126
+
127
+ default_client=wake_up_space_with_retries("pi19404/ai-worker",API_TOKEN)
128
+
129
  def get_client_for_ip(ip_address,x_ip_token):
130
  """
131
  Retrieve or create a client for the given IP address.
 
202
  # The "gradio/text-to-image" space is a ZeroGPU space
203
 
204
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
205
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
206
 
207
  with gr.Blocks() as demo:
208
  """
 
214
  - Other UI components (not shown in this snippet).
215
  - A load event that calls set_client_for_session when the interface is loaded.
216
  """
217
+
218
  gr.Markdown("## LLM Safety Evaluation")
219
  client = gr.State()
220
  with gr.Tab("ShieldGemma2"):