Spaces:

pi19404
/

shieldgemma-demo

Sleeping

App Files Files Community

3v324v23 commited on Sep 5, 2024

Commit

af44f8f

1 Parent(s): 6168b0d

rate limiting changes

Browse files

Files changed (1) hide show

app.py +55 -6

app.py CHANGED Viewed

@@ -32,13 +32,61 @@ from gradio_client import Client
 import json
 import threading
 import os
 API_TOKEN=os.getenv("API_TOKEN")
 lock = threading.Lock()
-client = Client("pi19404/ai-worker",hf_token=API_TOKEN)
-def my_inference_function(input_data, output_data,mode, max_length, max_new_tokens, model_size):
     """
     The main inference function to process input data and return results.
@@ -78,8 +126,9 @@ def my_inference_function(input_data, output_data,mode, max_length, max_new_toke
 with gr.Blocks() as demo:
     gr.Markdown("## LLM Safety Evaluation")
     with gr.Tab("ShieldGemma2"):
         input_text = gr.Textbox(label="Input Text")
         output_text = gr.Textbox(
             label="Response Text",
@@ -100,7 +149,7 @@ with gr.Blocks() as demo:
             elem_classes=["wrap-text"]
         )
         text_button = gr.Button("Submit")
-        text_button.click(fn=my_inference_function, inputs=[input_text, output_text, mode_input, max_length_input, max_new_tokens_input, model_size_input], outputs=response_text)
     # with gr.Tab("API Input"):
     #     api_input = gr.JSON(label="Input JSON")
@@ -112,7 +161,7 @@ with gr.Blocks() as demo:
     #     api_button = gr.Button("Submit")
     #     api_button.click(fn=my_inference_function, inputs=[api_input, api_output,mode_input_api, max_length_input_api, max_new_tokens_input_api, model_size_input_api], outputs=api_output)
-demo.launch(share=True)

 import json
 import threading
 import os
+from collections import OrderedDict
 API_TOKEN=os.getenv("API_TOKEN")
 lock = threading.Lock()
+#client = Client("pi19404/ai-worker",hf_token=API_TOKEN)
+# Create an OrderedDict to store clients, limited to 15 entries
+client_cache = OrderedDict()
+MAX_CACHE_SIZE = 15
+default_client=Client("pi19404/ai-worker", hf_token=API_TOKEN)
+def get_client_for_ip(ip_address,x_ip_token):
+    if x_ip_token is None:
+        x_ip_token=ip_address
+    #print("ipaddress is ",x_ip_token)
+    if x_ip_token is None:
+        new_client=default_client
+    else:
+        if x_ip_token in client_cache:
+            # Move the accessed item to the end (most recently used)
+            client_cache.move_to_end(x_ip_token)
+            return client_cache[x_ip_token]
+        # Create a new client
+        new_client = Client("pi19404/ai-worker", hf_token=API_TOKEN, headers={"X-IP-Token": x_ip_token})
+        # Add to cache, removing oldest if necessary
+        if len(client_cache) >= MAX_CACHE_SIZE:
+            client_cache.popitem(last=False)
+        client_cache[x_ip_token] = new_client
+    return new_client
+def set_client_for_session(request: gr.Request):
+    # Collect all headers in a dictionary
+    all_headers = {header: value for header, value in request.headers.items()}
+    # Print headers to console
+    print("All request headers:")
+    print(json.dumps(all_headers, indent=2))
+    x_ip_token = request.headers.get('x-ip-token',None)
+    ip_address = request.client.host
+    print("ip address is ",ip_address)
+    client = get_client_for_ip(ip_address,x_ip_token)
+    # Return both the client and the headers
+    return client, json.dumps(all_headers, indent=2)
+    # The "gradio/text-to-image" space is a ZeroGPU space
+def my_inference_function(client,input_data, output_data,mode, max_length, max_new_tokens, model_size):
     """
     The main inference function to process input data and return results.
 with gr.Blocks() as demo:
     gr.Markdown("## LLM Safety Evaluation")
+    client = gr.State()
     with gr.Tab("ShieldGemma2"):
         input_text = gr.Textbox(label="Input Text")
         output_text = gr.Textbox(
             label="Response Text",
             elem_classes=["wrap-text"]
         )
         text_button = gr.Button("Submit")
+        text_button.click(fn=my_inference_function, inputs=[client,input_text, output_text, mode_input, max_length_input, max_new_tokens_input, model_size_input], outputs=response_text)
     # with gr.Tab("API Input"):
     #     api_input = gr.JSON(label="Input JSON")
     #     api_button = gr.Button("Submit")
     #     api_button.click(fn=my_inference_function, inputs=[api_input, api_output,mode_input_api, max_length_input_api, max_new_tokens_input_api, model_size_input_api], outputs=api_output)
+    demo.load(set_client_for_session,None,client)
+demo.launch(share=True)