vineet124jig commited on
Commit
3612cb3
·
verified ·
1 Parent(s): e1e5181

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +60 -4
app.py CHANGED
@@ -1,14 +1,60 @@
1
  import gradio as gr
2
  import requests
 
 
 
 
3
  from PIL import Image
4
  import io
5
- import os
 
6
  BASE_URL = "https://api.jigsawstack.com/v1"
7
- headers = {"x-api-key": os.getenv("JIGSAWSTACK_API_KEY")}
 
 
8
 
9
- # ----------------- JigsawStack API Wrappers ------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
 
11
- def enhanced_ai_scrape(input_method, url, html, prompts_str, selector, page_pos):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  def error_response(message):
13
  return (
14
  message,
@@ -19,6 +65,11 @@ def enhanced_ai_scrape(input_method, url, html, prompts_str, selector, page_pos)
19
  gr.update(visible=False),
20
  )
21
 
 
 
 
 
 
22
  try:
23
  # Validate element prompts
24
  prompts = [p.strip() for p in prompts_str.split(",") if p.strip()]
@@ -172,6 +223,11 @@ with gr.Blocks() as demo:
172
  visible=False
173
  )
174
 
 
 
 
 
 
175
  scrape_btn = gr.Button("Scrape with AI", variant="primary")
176
 
177
  # Function to show/hide input groups based on selection
 
1
  import gradio as gr
2
  import requests
3
+ import json
4
+ import os
5
+ import time
6
+ from collections import defaultdict
7
  from PIL import Image
8
  import io
9
+
10
+
11
  BASE_URL = "https://api.jigsawstack.com/v1"
12
+ headers = {
13
+ "x-api-key": os.getenv("JIGSAWSTACK_API_KEY")
14
+ }
15
 
16
+ # Rate limiting configuration
17
+ request_times = defaultdict(list)
18
+ MAX_REQUESTS = 1 # Maximum requests per time window
19
+ TIME_WINDOW = 3600 # Time window in seconds (1 hour)
20
+
21
+ def get_real_ip(request: gr.Request):
22
+ """Extract real IP address using x-forwarded-for header or fallback"""
23
+ if not request:
24
+ return "unknown"
25
+
26
+ forwarded = request.headers.get("x-forwarded-for")
27
+ if forwarded:
28
+ ip = forwarded.split(",")[0].strip() # First IP in the list is the client's
29
+ else:
30
+ ip = request.client.host # fallback
31
+ return ip
32
 
33
+ def check_rate_limit(request: gr.Request):
34
+ """Check if the current request exceeds rate limits"""
35
+ if not request:
36
+ return True, "Rate limit check failed - no request info"
37
+
38
+ ip = get_real_ip(request)
39
+ now = time.time()
40
+
41
+ # Clean up old timestamps outside the time window
42
+ request_times[ip] = [t for t in request_times[ip] if now - t < TIME_WINDOW]
43
+
44
+ # Check if rate limit exceeded
45
+ if len(request_times[ip]) >= MAX_REQUESTS:
46
+ time_remaining = int(TIME_WINDOW - (now - request_times[ip][0]))
47
+ time_remaining_minutes = round(time_remaining / 60, 1)
48
+ time_window_minutes = round(TIME_WINDOW / 60, 1)
49
+
50
+ return False, f"Rate limit exceeded. You can make {MAX_REQUESTS} requests per {time_window_minutes} minutes. Try again in {time_remaining_minutes} minutes."
51
+
52
+ # Add current request timestamp
53
+ request_times[ip].append(now)
54
+ return True, ""
55
+
56
+
57
+ def enhanced_ai_scrape(input_method, url, html, prompts_str, selector, page_pos, request: gr.Request):
58
  def error_response(message):
59
  return (
60
  message,
 
65
  gr.update(visible=False),
66
  )
67
 
68
+ # Check rate limit first
69
+ rate_limit_ok, rate_limit_msg = check_rate_limit(request)
70
+ if not rate_limit_ok:
71
+ return error_response(f"Rate limit exceeded: {rate_limit_msg}")
72
+
73
  try:
74
  # Validate element prompts
75
  prompts = [p.strip() for p in prompts_str.split(",") if p.strip()]
 
223
  visible=False
224
  )
225
 
226
+ # Rate limit status
227
+ gr.Markdown("#### Rate Limit Status")
228
+ rate_limit_status = gr.JSON(label="Current Usage", interactive=False)
229
+ refresh_status_btn = gr.Button("Refresh Status", size="sm")
230
+
231
  scrape_btn = gr.Button("Scrape with AI", variant="primary")
232
 
233
  # Function to show/hide input groups based on selection