vineet124jig commited on
Commit
dad171a
·
verified ·
1 Parent(s): 21b6650

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +153 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,153 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import requests
3
+ import json
4
+ import os
5
+ import time
6
+ from collections import defaultdict
7
+
8
+ BASE_URL = "https://api.jigsawstack.com/v1"
9
+ headers = {
10
+ "x-api-key": os.getenv("JIGSAWSTACK_API_KEY")
11
+ }
12
+
13
+ # Rate limiting configuration
14
+ request_times = defaultdict(list)
15
+ MAX_REQUESTS = 10 # Maximum requests per time window
16
+ TIME_WINDOW = 60 # Time window in seconds
17
+
18
+ def check_rate_limit(request: gr.Request):
19
+ """Check if the current request exceeds rate limits"""
20
+ if not request:
21
+ return True, "Rate limit check failed - no request info"
22
+
23
+ ip = request.client.host
24
+ now = time.time()
25
+
26
+ # Clean up old timestamps outside the time window
27
+ request_times[ip] = [t for t in request_times[ip] if now - t < TIME_WINDOW]
28
+
29
+ # Check if rate limit exceeded
30
+ if len(request_times[ip]) >= MAX_REQUESTS:
31
+ time_remaining = int(TIME_WINDOW - (now - request_times[ip][0]))
32
+ return False, f"Rate limit exceeded. You can make {MAX_REQUESTS} requests per {TIME_WINDOW} seconds. Try again in {time_remaining} seconds."
33
+
34
+ # Add current request timestamp
35
+ request_times[ip].append(now)
36
+ return True, ""
37
+
38
+ def generate_embedding(input_type, text_content, url, content_type, token_overflow_mode, request: gr.Request):
39
+ """Generate embeddings using JigsawStack Embedding API with rate limiting"""
40
+
41
+ # Check rate limit first
42
+ rate_limit_ok, rate_limit_msg = check_rate_limit(request)
43
+ if not rate_limit_ok:
44
+ return rate_limit_msg, ""
45
+
46
+ # Validate inputs
47
+ if input_type == "Text" and not text_content:
48
+ return "Error: Please provide text content.", ""
49
+ if input_type == "URL" and not url:
50
+ return "Error: Please provide a URL.", ""
51
+
52
+ try:
53
+ payload = {
54
+ "type": content_type,
55
+ "token_overflow_mode": token_overflow_mode
56
+ }
57
+
58
+ if input_type == "Text":
59
+ payload["text"] = text_content.strip()
60
+ elif input_type == "URL":
61
+ payload["url"] = url.strip()
62
+
63
+ response = requests.post(
64
+ f"{BASE_URL}/embedding",
65
+ headers=headers,
66
+ json=payload
67
+ )
68
+ response.raise_for_status()
69
+ result = response.json()
70
+
71
+ if not result.get("success"):
72
+ error_msg = f"Error: API call failed - {result.get('message', 'Unknown error')}"
73
+ return error_msg, ""
74
+
75
+ embedding = result.get("embeddings", [])
76
+ embedding_str = json.dumps(embedding, indent=2)
77
+ return "Embedding generated successfully!", embedding_str
78
+ except requests.exceptions.RequestException as e:
79
+ return f"Request failed: {str(e)}", ""
80
+ except Exception as e:
81
+ return f"An unexpected error occurred: {str(e)}", ""
82
+
83
+ with gr.Blocks() as demo:
84
+ gr.Markdown("""
85
+ <div style='text-align: center; margin-bottom: 24px;'>
86
+ <h1 style='font-size:2.2em; margin-bottom: 0.2em;'>Vector Embedding Generator</h1>
87
+ <p style='font-size:1.2em; margin-top: 0;'>Generate vector embeddings from various content types including text, images, audio, and PDF files.</p>
88
+ <p style='font-size:1em; margin-top: 0.5em;'>Supported types: text, text-other, image, audio, pdf</p>
89
+ <p style='font-size:0.9em; margin-top: 0.5em; color: #666;'>Rate limit: {MAX_REQUESTS} requests per {TIME_WINDOW} seconds per IP</p>
90
+ </div>
91
+ """)
92
+
93
+ with gr.Row():
94
+ with gr.Column():
95
+ gr.Markdown("#### Content Input")
96
+ input_type = gr.Radio([
97
+ "Text",
98
+ "URL"
99
+ ], value="Text", label="Select Input Type")
100
+
101
+ text_content = gr.Textbox(
102
+ label="Text Content",
103
+ placeholder="Enter the text content to generate embeddings for...",
104
+ visible=True,
105
+ lines=5
106
+ )
107
+ url = gr.Textbox(
108
+ label="URL",
109
+ placeholder="Enter the URL of the resource...",
110
+ visible=False
111
+ )
112
+
113
+ content_type = gr.Dropdown(
114
+ choices=["text", "text-other", "image", "audio", "pdf"],
115
+ value="text",
116
+ label="Content Type",
117
+ info="Select the type of content being processed"
118
+ )
119
+
120
+ token_overflow_mode = gr.Radio(
121
+ choices=["error", "truncate"],
122
+ value="error",
123
+ label="Token Overflow Mode",
124
+ info="How to handle input that exceeds token limits"
125
+ )
126
+
127
+ generate_btn = gr.Button("Generate Embedding", variant="primary")
128
+
129
+ with gr.Column():
130
+ gr.Markdown("#### Embedding Result")
131
+ status_message = gr.Textbox(label="Status", interactive=False)
132
+ embedding_result = gr.Textbox(
133
+ label="Vector Embedding",
134
+ interactive=False,
135
+ lines=15,
136
+ max_lines=25
137
+ )
138
+
139
+ def toggle_inputs(selected):
140
+ if selected == "Text":
141
+ return gr.update(visible=True), gr.update(visible=False)
142
+ else: # URL
143
+ return gr.update(visible=False), gr.update(visible=True)
144
+
145
+ input_type.change(toggle_inputs, inputs=[input_type], outputs=[text_content, url])
146
+
147
+ generate_btn.click(
148
+ generate_embedding,
149
+ inputs=[input_type, text_content, url, content_type, token_overflow_mode],
150
+ outputs=[status_message, embedding_result]
151
+ )
152
+
153
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ gradio
2
+ requests
3
+ Pillow