Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -134,15 +134,50 @@ def enhanced_ai_scrape(input_method, url, html, prompts_str, selector, page_pos,
|
|
134 |
except Exception as e:
|
135 |
return error_response(f"Unexpected error: {str(e)}")
|
136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
# ----------------- Gradio UI ------------------
|
138 |
|
139 |
with gr.Blocks() as demo:
|
140 |
gr.Markdown("""
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
145 |
-
|
|
|
146 |
""")
|
147 |
|
148 |
with gr.Row():
|
@@ -225,7 +260,7 @@ with gr.Blocks() as demo:
|
|
225 |
|
226 |
# Rate limit status
|
227 |
gr.Markdown("#### Rate Limit Status")
|
228 |
-
rate_limit_status = gr.JSON(label="Current Usage"
|
229 |
refresh_status_btn = gr.Button("Refresh Status", size="sm")
|
230 |
|
231 |
scrape_btn = gr.Button("Scrape with AI", variant="primary")
|
@@ -248,7 +283,24 @@ with gr.Blocks() as demo:
|
|
248 |
scrape_btn.click(
|
249 |
enhanced_ai_scrape,
|
250 |
inputs=[input_method_scraper, url_scraper, html_content, element_prompts, root_selector, page_position],
|
251 |
-
outputs=[scrape_status, context_output, selectors_output, detailed_data, links_data, pagination_info]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
252 |
)
|
253 |
|
254 |
demo.launch()
|
|
|
134 |
except Exception as e:
|
135 |
return error_response(f"Unexpected error: {str(e)}")
|
136 |
|
137 |
+
|
138 |
+
def get_rate_limit_status(request: gr.Request):
|
139 |
+
"""Get current rate limit status for the user"""
|
140 |
+
if not request:
|
141 |
+
return {"error": "Unable to get request info"}
|
142 |
+
|
143 |
+
ip = get_real_ip(request)
|
144 |
+
now = time.time()
|
145 |
+
|
146 |
+
# Clean up old timestamps
|
147 |
+
request_times[ip] = [t for t in request_times[ip] if now - t < TIME_WINDOW]
|
148 |
+
|
149 |
+
current_requests = len(request_times[ip])
|
150 |
+
time_window_minutes = round(TIME_WINDOW / 60, 1)
|
151 |
+
|
152 |
+
if current_requests >= MAX_REQUESTS:
|
153 |
+
time_remaining = int(TIME_WINDOW - (now - request_times[ip][0]))
|
154 |
+
time_remaining_minutes = round(time_remaining / 60, 1)
|
155 |
+
return {
|
156 |
+
"status": "Rate limited",
|
157 |
+
"current_requests": current_requests,
|
158 |
+
"max_requests": MAX_REQUESTS,
|
159 |
+
"time_window_minutes": time_window_minutes,
|
160 |
+
"time_remaining_minutes": time_remaining_minutes
|
161 |
+
}
|
162 |
+
else:
|
163 |
+
return {
|
164 |
+
"status": "Available",
|
165 |
+
"current_requests": current_requests,
|
166 |
+
"max_requests": MAX_REQUESTS,
|
167 |
+
"time_window_minutes": time_window_minutes,
|
168 |
+
"remaining_requests": MAX_REQUESTS - current_requests
|
169 |
+
}
|
170 |
+
|
171 |
# ----------------- Gradio UI ------------------
|
172 |
|
173 |
with gr.Blocks() as demo:
|
174 |
gr.Markdown("""
|
175 |
+
<div style='text-align: center; margin-bottom: 24px;'>
|
176 |
+
<h1 style='font-size:2.2em; margin-bottom: 0.2em;'>🧩 AI Scraper</h1>
|
177 |
+
<p style='font-size:1.2em; margin-top: 0;'>Extract structured data from web pages with advanced AI models.</p>
|
178 |
+
<p style='font-size:1em; margin-top: 0.5em;'>For more details and API usage, see the <a href='https://jigsawstack.com/docs/api-reference/ai/scrape' target='_blank'>documentation</a>.</p>
|
179 |
+
<p style='font-size:0.9em; margin-top: 0.5em; color: #666;'>Rate limit: 1 request per hour per IP address</p>
|
180 |
+
</div>
|
181 |
""")
|
182 |
|
183 |
with gr.Row():
|
|
|
260 |
|
261 |
# Rate limit status
|
262 |
gr.Markdown("#### Rate Limit Status")
|
263 |
+
rate_limit_status = gr.JSON(label="Current Usage")
|
264 |
refresh_status_btn = gr.Button("Refresh Status", size="sm")
|
265 |
|
266 |
scrape_btn = gr.Button("Scrape with AI", variant="primary")
|
|
|
283 |
scrape_btn.click(
|
284 |
enhanced_ai_scrape,
|
285 |
inputs=[input_method_scraper, url_scraper, html_content, element_prompts, root_selector, page_position],
|
286 |
+
outputs=[scrape_status, context_output, selectors_output, detailed_data, links_data, pagination_info],
|
287 |
+
_js="() => []"
|
288 |
+
)
|
289 |
+
|
290 |
+
# Rate limit status handlers
|
291 |
+
refresh_status_btn.click(
|
292 |
+
get_rate_limit_status,
|
293 |
+
inputs=[],
|
294 |
+
outputs=rate_limit_status,
|
295 |
+
_js="() => []"
|
296 |
+
)
|
297 |
+
|
298 |
+
# Auto-refresh rate limit status when page loads
|
299 |
+
demo.load(
|
300 |
+
get_rate_limit_status,
|
301 |
+
inputs=[],
|
302 |
+
outputs=rate_limit_status,
|
303 |
+
_js="() => []"
|
304 |
)
|
305 |
|
306 |
demo.launch()
|