Spaces:
Building
Building
Commit
·
b4fe9b6
1
Parent(s):
d053923
IT IS WORKIIIIING
Browse files
app.py
CHANGED
@@ -3,15 +3,32 @@ import pandas as pd
|
|
3 |
import gradio as gr
|
4 |
from typing import Dict, Any, Type
|
5 |
from web2json.preprocessor import BasicPreprocessor
|
6 |
-
from web2json.ai_extractor import AIExtractor,LLMClassifierExtractor,NvidiaLLMClient
|
7 |
from web2json.postprocessor import PostProcessor
|
8 |
from web2json.pipeline import Pipeline
|
9 |
from pydantic import BaseModel, Field, create_model
|
10 |
import os
|
11 |
import dotenv
|
|
|
|
|
|
|
12 |
|
13 |
dotenv.load_dotenv()
|
14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
def parse_schema_input(schema_input: str) -> Type[BaseModel]:
|
16 |
"""
|
17 |
Convert user schema input to a Pydantic BaseModel.
|
@@ -170,66 +187,19 @@ def webpage_to_json(content: str, is_url: bool, schema: BaseModel) -> Dict[str,
|
|
170 |
- Preserve the original formatting and context where relevant
|
171 |
- Return the extracted data in the format specified by the schema"""
|
172 |
|
173 |
-
classification_prompt_template =
|
174 |
-
# HTML Chunk Relevance Classification Prompt
|
175 |
-
|
176 |
-
You are an HTML content classifier. Your task is to analyze an HTML chunk against a given schema and determine if the content is relevant.
|
177 |
-
|
178 |
-
## Instructions:
|
179 |
-
1. Carefully examine the provided HTML chunk
|
180 |
-
2. Compare it against the given schema/criteria
|
181 |
-
3. Determine if the HTML chunk contains content that matches or is relevant to the schema
|
182 |
-
4. Respond with ONLY a JSON object containing a single field "relevant" with value 1 (relevant) or 0 (not relevant)
|
183 |
-
|
184 |
-
## Input Format:
|
185 |
-
**Schema/Criteria:**
|
186 |
-
{schema}
|
187 |
-
|
188 |
-
**HTML Chunk:**
|
189 |
-
```html
|
190 |
-
{content}
|
191 |
-
```
|
192 |
-
|
193 |
-
## Output Format:
|
194 |
-
Your response must be ONLY a valid JSON object with no additional text:
|
195 |
-
|
196 |
-
```json
|
197 |
-
{{
|
198 |
-
"relevant": 1
|
199 |
-
}}
|
200 |
-
```
|
201 |
-
|
202 |
-
OR
|
203 |
-
|
204 |
-
```json
|
205 |
-
{{
|
206 |
-
"relevant": 0
|
207 |
-
}}
|
208 |
-
```
|
209 |
-
|
210 |
-
## Classification Rules:
|
211 |
-
- Output 1 if the HTML chunk contains content that matches the schema criteria
|
212 |
-
- Output 0 if the HTML chunk does not contain relevant content
|
213 |
-
- Consider semantic meaning, not just exact keyword matches
|
214 |
-
- Look at text content, attributes, structure, and context
|
215 |
-
- Ignore purely structural HTML elements (like divs, spans) unless they contain relevant content
|
216 |
-
- Be STRICT in your evaluation - only mark as relevant (1) if there is clear, meaningful content that directly relates to the schema
|
217 |
-
- Empty elements, placeholder text, navigation menus, headers/footers, and generic UI components should typically be marked as not relevant (0)
|
218 |
-
- The HTML chunk does not need to contain ALL schema information, but it must contain SUBSTANTIAL and SPECIFIC content related to the schema
|
219 |
-
|
220 |
-
CRITICAL: Your entire response MUST be exactly one JSON object. DO NOT include any explanations, reasoning, markdown formatting, code blocks, or additional text. Output ONLY the raw JSON object.
|
221 |
-
"""
|
222 |
# Initialize pipeline components
|
223 |
# TODO: improve the RAG system and optimize (don't instantiate every time)
|
224 |
preprocessor = BasicPreprocessor(config={'keep_tags': True})
|
225 |
try:
|
226 |
# llm = GeminiLLMClient(config={'api_key': os.getenv('GEMINI_API_KEY')})
|
227 |
llm = NvidiaLLMClient(config={'api_key': os.getenv('NVIDIA_API_KEY'),'model_name': 'qwen/qwen2.5-7b-instruct'})
|
|
|
228 |
except Exception as e:
|
229 |
return {"error": f"Failed to initialize LLM client: {str(e)}"}
|
230 |
|
231 |
# ai_extractor = RAGExtractor(llm_client=llm, prompt_template=prompt_template)
|
232 |
-
ai_extractor = LLMClassifierExtractor(llm_client=llm, prompt_template=prompt_template, classifier_prompt=classification_prompt_template)
|
233 |
postprocessor = PostProcessor()
|
234 |
pipeline = Pipeline(preprocessor, ai_extractor, postprocessor)
|
235 |
|
|
|
3 |
import gradio as gr
|
4 |
from typing import Dict, Any, Type
|
5 |
from web2json.preprocessor import BasicPreprocessor
|
6 |
+
from web2json.ai_extractor import AIExtractor,LLMClassifierExtractor,NvidiaLLMClient, NvidiaRerankerClient
|
7 |
from web2json.postprocessor import PostProcessor
|
8 |
from web2json.pipeline import Pipeline
|
9 |
from pydantic import BaseModel, Field, create_model
|
10 |
import os
|
11 |
import dotenv
|
12 |
+
import random
|
13 |
+
import numpy as np
|
14 |
+
import torch
|
15 |
|
16 |
dotenv.load_dotenv()
|
17 |
|
18 |
+
def seed_everything(seed=42):
|
19 |
+
random.seed(seed)
|
20 |
+
np.random.seed(seed)
|
21 |
+
torch.manual_seed(seed)
|
22 |
+
|
23 |
+
if torch.cuda.is_available():
|
24 |
+
torch.cuda.manual_seed(seed)
|
25 |
+
torch.cuda.manual_seed_all(seed) # if using multi-GPU
|
26 |
+
|
27 |
+
torch.backends.cudnn.deterministic = True
|
28 |
+
torch.backends.cudnn.benchmark = False
|
29 |
+
|
30 |
+
seed_everything(22)
|
31 |
+
|
32 |
def parse_schema_input(schema_input: str) -> Type[BaseModel]:
|
33 |
"""
|
34 |
Convert user schema input to a Pydantic BaseModel.
|
|
|
187 |
- Preserve the original formatting and context where relevant
|
188 |
- Return the extracted data in the format specified by the schema"""
|
189 |
|
190 |
+
classification_prompt_template = schema.model_json_schema()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
191 |
# Initialize pipeline components
|
192 |
# TODO: improve the RAG system and optimize (don't instantiate every time)
|
193 |
preprocessor = BasicPreprocessor(config={'keep_tags': True})
|
194 |
try:
|
195 |
# llm = GeminiLLMClient(config={'api_key': os.getenv('GEMINI_API_KEY')})
|
196 |
llm = NvidiaLLMClient(config={'api_key': os.getenv('NVIDIA_API_KEY'),'model_name': 'qwen/qwen2.5-7b-instruct'})
|
197 |
+
reranker = NvidiaRerankerClient(config={'api_key': os.getenv('NVIDIA_API_KEY'),'model_name': 'nv-rerank-qa-mistral-4b:1'})
|
198 |
except Exception as e:
|
199 |
return {"error": f"Failed to initialize LLM client: {str(e)}"}
|
200 |
|
201 |
# ai_extractor = RAGExtractor(llm_client=llm, prompt_template=prompt_template)
|
202 |
+
ai_extractor = LLMClassifierExtractor(reranker=reranker, llm_client=llm, prompt_template=prompt_template, classifier_prompt=classification_prompt_template)
|
203 |
postprocessor = PostProcessor()
|
204 |
pipeline = Pipeline(preprocessor, ai_extractor, postprocessor)
|
205 |
|
test.ipynb
CHANGED
@@ -2,7 +2,7 @@
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
-
"execution_count":
|
6 |
"id": "5223b1b7",
|
7 |
"metadata": {},
|
8 |
"outputs": [],
|
@@ -15,7 +15,29 @@
|
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
-
"execution_count":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
"id": "ae4e7f03",
|
20 |
"metadata": {},
|
21 |
"outputs": [
|
@@ -25,7 +47,7 @@
|
|
25 |
"True"
|
26 |
]
|
27 |
},
|
28 |
-
"execution_count":
|
29 |
"metadata": {},
|
30 |
"output_type": "execute_result"
|
31 |
}
|
@@ -37,19 +59,64 @@
|
|
37 |
},
|
38 |
{
|
39 |
"cell_type": "code",
|
40 |
-
"execution_count":
|
41 |
"id": "9e6b0eb9",
|
42 |
"metadata": {},
|
43 |
"outputs": [],
|
44 |
"source": [
|
45 |
"llm = NvidiaLLMClient(config={'api_key': os.getenv('NVIDIA_API_KEY'),'model_name': 'qwen/qwen2.5-7b-instruct'})\n",
|
46 |
-
"reranker = NvidiaRerankerClient(config={'api_key': os.getenv('NVIDIA_API_KEY'),'model_name':
|
47 |
"# reranker = HFRerankerClient()"
|
48 |
]
|
49 |
},
|
50 |
{
|
51 |
"cell_type": "code",
|
52 |
-
"execution_count":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
"id": "3bc223d0",
|
54 |
"metadata": {},
|
55 |
"outputs": [],
|
@@ -67,23 +134,17 @@
|
|
67 |
},
|
68 |
{
|
69 |
"cell_type": "code",
|
70 |
-
"execution_count":
|
71 |
"id": "475fccd2",
|
72 |
"metadata": {},
|
73 |
"outputs": [],
|
74 |
"source": [
|
75 |
-
"classification_prompt_template =
|
76 |
-
"{\n",
|
77 |
-
" \"title\": {\"type\": \"string\", \"description\": \"Page title\"},\n",
|
78 |
-
" \"price\": {\"type\": \"number\", \"description\": \"Product price\"},\n",
|
79 |
-
" \"description\": {\"type\": \"string\", \"description\": \"Product description\"}\n",
|
80 |
-
"}\n",
|
81 |
-
"\"\"\""
|
82 |
]
|
83 |
},
|
84 |
{
|
85 |
"cell_type": "code",
|
86 |
-
"execution_count":
|
87 |
"id": "974417de",
|
88 |
"metadata": {},
|
89 |
"outputs": [],
|
@@ -141,7 +202,7 @@
|
|
141 |
},
|
142 |
{
|
143 |
"cell_type": "code",
|
144 |
-
"execution_count":
|
145 |
"id": "58436d65",
|
146 |
"metadata": {},
|
147 |
"outputs": [],
|
@@ -155,7 +216,7 @@
|
|
155 |
},
|
156 |
{
|
157 |
"cell_type": "code",
|
158 |
-
"execution_count":
|
159 |
"id": "c4e75e63",
|
160 |
"metadata": {},
|
161 |
"outputs": [],
|
@@ -213,7 +274,7 @@
|
|
213 |
},
|
214 |
{
|
215 |
"cell_type": "code",
|
216 |
-
"execution_count":
|
217 |
"id": "bb4edecf",
|
218 |
"metadata": {},
|
219 |
"outputs": [
|
@@ -223,7 +284,7 @@
|
|
223 |
"4"
|
224 |
]
|
225 |
},
|
226 |
-
"execution_count":
|
227 |
"metadata": {},
|
228 |
"output_type": "execute_result"
|
229 |
}
|
@@ -234,17 +295,29 @@
|
|
234 |
},
|
235 |
{
|
236 |
"cell_type": "code",
|
237 |
-
"execution_count":
|
238 |
"id": "9927a78e",
|
239 |
"metadata": {},
|
240 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
"source": [
|
242 |
"output = reranker.rerank(query=classification_prompt_template,passages=html_chunks)"
|
243 |
]
|
244 |
},
|
245 |
{
|
246 |
"cell_type": "code",
|
247 |
-
"execution_count":
|
248 |
"id": "b77015f3",
|
249 |
"metadata": {},
|
250 |
"outputs": [
|
@@ -252,53 +325,14 @@
|
|
252 |
"name": "stdout",
|
253 |
"output_type": "stream",
|
254 |
"text": [
|
255 |
-
"
|
256 |
" <div class=\"product-card\">\n",
|
257 |
" <h2 class=\"product-title\">Wireless Noise Cancelling Headphones</h2>\n",
|
258 |
" <p class=\"product-description\">Experience immersive sound with active noise cancellation and long battery life.</p>\n",
|
259 |
" <span class=\"price\">$299.99</span>\n",
|
260 |
" <button>Add to Cart</button>\n",
|
261 |
" </div>\n",
|
262 |
-
"
|
263 |
-
"--------------------------------------------------------------------------------\n",
|
264 |
-
"page_content='\n",
|
265 |
-
" <section class=\"blog-post\">\n",
|
266 |
-
" <h1>Top 5 AI Tools to Try in 2025</h1>\n",
|
267 |
-
" <p>Artificial intelligence continues to evolve. Here are five tools you should explore in 2025:</p>\n",
|
268 |
-
" <ul>\n",
|
269 |
-
" <li>LangChain</li>\n",
|
270 |
-
" <li>AutoGen</li>\n",
|
271 |
-
" <li>OpenDevin</li>\n",
|
272 |
-
" <li>FastRAG</li>\n",
|
273 |
-
" <li>GPTScript</li>\n",
|
274 |
-
" </ul>\n",
|
275 |
-
" <footer>Published by <strong>TechToday</strong> on June 30, 2025</footer>\n",
|
276 |
-
" </section>\n",
|
277 |
-
" ' metadata={'relevance_score': -15.2265625, 'softmax_score': 0.017900461577508887, 'minmax_score': 0.00888037271767236}\n",
|
278 |
-
"--------------------------------------------------------------------------------\n",
|
279 |
-
"page_content='\n",
|
280 |
-
" <section class=\"blog-post\">\n",
|
281 |
-
" <h1>Top 5 AI Tools to Try in 2025</h1>\n",
|
282 |
-
" <p>Artificial intelligence continues to evolve. Here are five tools you should explore in 2025:</p>\n",
|
283 |
-
" <ul>\n",
|
284 |
-
" <li>LangChain</li>\n",
|
285 |
-
" <li>AutoGen</li>\n",
|
286 |
-
" <li>OpenDevin</li>\n",
|
287 |
-
" <li>FastRAG</li>\n",
|
288 |
-
" <li>GPTScript</li>\n",
|
289 |
-
" </ul>\n",
|
290 |
-
" <footer>Published by <strong>TechToday</strong> on June 30, 2025</footer>\n",
|
291 |
-
" </section>\n",
|
292 |
-
" ' metadata={'relevance_score': -15.2265625, 'softmax_score': 0.017900461577508887, 'minmax_score': 0.00888037271767236}\n",
|
293 |
-
"--------------------------------------------------------------------------------\n",
|
294 |
-
"page_content='\n",
|
295 |
-
" <div class=\"review\">\n",
|
296 |
-
" <h3>User Review: Amazing Performance!</h3>\n",
|
297 |
-
" <p>I’ve been using this laptop for a few months and it’s blazing fast. Great for deep learning workloads!</p>\n",
|
298 |
-
" <div class=\"rating\">Rating: ⭐⭐⭐⭐⭐</div>\n",
|
299 |
-
" <span class=\"user\">– Sarah M.</span>\n",
|
300 |
-
" </div>\n",
|
301 |
-
" ' metadata={'relevance_score': -15.859375, 'softmax_score': 0.009506863381497203, 'minmax_score': 0.0}\n",
|
302 |
"--------------------------------------------------------------------------------\n"
|
303 |
]
|
304 |
}
|
@@ -311,17 +345,17 @@
|
|
311 |
},
|
312 |
{
|
313 |
"cell_type": "code",
|
314 |
-
"execution_count":
|
315 |
"id": "bb3fa1b0",
|
316 |
"metadata": {},
|
317 |
"outputs": [
|
318 |
{
|
319 |
"data": {
|
320 |
"text/plain": [
|
321 |
-
"
|
322 |
]
|
323 |
},
|
324 |
-
"execution_count":
|
325 |
"metadata": {},
|
326 |
"output_type": "execute_result"
|
327 |
}
|
@@ -332,7 +366,7 @@
|
|
332 |
},
|
333 |
{
|
334 |
"cell_type": "code",
|
335 |
-
"execution_count":
|
336 |
"id": "c1c43f7c",
|
337 |
"metadata": {},
|
338 |
"outputs": [],
|
@@ -342,7 +376,7 @@
|
|
342 |
},
|
343 |
{
|
344 |
"cell_type": "code",
|
345 |
-
"execution_count":
|
346 |
"id": "9c78eec9",
|
347 |
"metadata": {},
|
348 |
"outputs": [],
|
@@ -352,36 +386,7 @@
|
|
352 |
},
|
353 |
{
|
354 |
"cell_type": "code",
|
355 |
-
"execution_count":
|
356 |
-
"id": "0b324a01",
|
357 |
-
"metadata": {},
|
358 |
-
"outputs": [],
|
359 |
-
"source": [
|
360 |
-
"from pydantic import BaseModel, Field, constr, condecimal\n",
|
361 |
-
"\n",
|
362 |
-
"class ProductModel(BaseModel):\n",
|
363 |
-
" productTitle: constr(min_length=1, max_length=200) = Field(\n",
|
364 |
-
" ...,\n",
|
365 |
-
" title=\"Product Title\",\n",
|
366 |
-
" description=\"The full title of the product\"\n",
|
367 |
-
" )\n",
|
368 |
-
" price: condecimal(gt=0, decimal_places=2) = Field(\n",
|
369 |
-
" ...,\n",
|
370 |
-
" title=\"Product Price\",\n",
|
371 |
-
" description=\"Unit price (must be > 0, two decimal places).\"\n",
|
372 |
-
" )\n",
|
373 |
-
" manufacturer: constr(min_length=1, max_length=1000) = Field(\n",
|
374 |
-
" ...,\n",
|
375 |
-
" title=\"Manufacturer\",\n",
|
376 |
-
" description=\"Name of the product manufacturer.\"\n",
|
377 |
-
" )\n",
|
378 |
-
"\n",
|
379 |
-
" "
|
380 |
-
]
|
381 |
-
},
|
382 |
-
{
|
383 |
-
"cell_type": "code",
|
384 |
-
"execution_count": 11,
|
385 |
"id": "92a5fc23",
|
386 |
"metadata": {},
|
387 |
"outputs": [],
|
@@ -393,25 +398,7 @@
|
|
393 |
},
|
394 |
{
|
395 |
"cell_type": "code",
|
396 |
-
"execution_count":
|
397 |
-
"id": "d2cfb033",
|
398 |
-
"metadata": {},
|
399 |
-
"outputs": [],
|
400 |
-
"source": [
|
401 |
-
"url = \"https://www.amazon.com/Instant-Pot-Multi-Use-Programmable-Pressure/dp/B00FLYWNYQ?_encoding=UTF8&content-id=amzn1.sym.2f889ce0-246f-467a-a086-d9a721167240&dib=eyJ2IjoiMSJ9.2EzBddTDEktLY8ckTsraM_cZ6pzKuNkA6y_gLR0-Uz1ekttQU6tuQEcjb8PThy0PfhvxLqeYWh3N7pQrGgRxAWzapVklC_aU6xBzD-3Wwqx3qyQRHsmOhPRsSpeCOIIZqS3SKDowZEPYrGnCbRMt5vxnsYMW-fD-zBbgeoeGYmbsN2U6_HNhLjrpePKCbQPmnZBJ9UhgYE4fE3DVuYm8xlJe9l5GixDLVFtZUq4m5FE.Ol-jiuu9P6mQie0yXLJj-Ht5-TXmIXuRPije85p_YVo&dib_tag=se&keywords=cooker&pd_rd_r=2cede598-f3ae-49ca-8a46-e5945a9c2631&pd_rd_w=2HLSC&pd_rd_wg=ZyUUn&qid=1749508157&sr=8-3\"\n",
|
402 |
-
"schema = ProductModel # pydantic class\n",
|
403 |
-
"\n",
|
404 |
-
"# read html file \n",
|
405 |
-
"# with open(r'C:\\Users\\abdfa\\Desktop\\UNI STUFFING\\GRADUATION PROJECT\\Group Work\\MCP_WEB2JSON\\0000.htm', 'r', encoding='utf-8') as file:\n",
|
406 |
-
"# content = file.read()\n",
|
407 |
-
"\n",
|
408 |
-
"# with open(r'C:\\Users\\abdfa\\Desktop\\UNI STUFFING\\GRADUATION PROJECT\\Group Work\\MCP_WEB2JSON\\Amazon.com_ Instant Pot Duo 7-in-1 Electric Pressure Cooker, Slow Cooker, Rice Cooker, Steamer, Sauté, Yogurt Maker, Warmer & Sterilizer, Includes App With Over 800 Recipes, Stainless Steel, 6 Quart.htm', 'r', encoding='utf-8') as file:\n",
|
409 |
-
"# content = file.read()\n"
|
410 |
-
]
|
411 |
-
},
|
412 |
-
{
|
413 |
-
"cell_type": "code",
|
414 |
-
"execution_count": 16,
|
415 |
"id": "f07e1aca",
|
416 |
"metadata": {},
|
417 |
"outputs": [],
|
@@ -439,7 +426,7 @@
|
|
439 |
},
|
440 |
{
|
441 |
"cell_type": "code",
|
442 |
-
"execution_count":
|
443 |
"id": "79cf2321",
|
444 |
"metadata": {},
|
445 |
"outputs": [
|
@@ -448,11 +435,164 @@
|
|
448 |
"output_type": "stream",
|
449 |
"text": [
|
450 |
"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n",
|
451 |
-
"
|
452 |
-
"
|
453 |
-
"Content successfully chunked: [\"<html><head>\\n<link/>\\n<link/>\\n<link/>\\n<meta/><title>Amazon.com: Instant Pot Duo 7-in-1 Electric Pressure Cooker, Slow Cooker, Rice Cooker, Steamer, Sauté, Yogurt Maker, Warmer & Sterilizer, Includes App With Over 800 Recipes, Stainless Steel, 6 Quart</title>\\n</head><body><div><nav>\\n<h2>Skip to</h2>\\n<ul>\\n<li>\\n Main content\\n</li>\\n<li>\\n About this item\\n</li>\\n<li>\\n About this item\\n</li>\\n<li>\\n About this item\\n</li>\\n<li>\\n Buying options\\n</li>\\n<li>\\n Compare with similar items\\n</li>\\n<li>\\n Videos\\n</li>\\n<li>\\n Reviews\\n</li>\\n</ul>\\n<h2>\\n Keyboard shortcuts\\n </h2>\\n<ul>\\n<li>\\nSearch\\nalt\\n+\\n/\\n</li>\\n<li>\\nCart\\nshift\\n+\\nalt\\n+\\nC\\n</li>\\n<li>\\nHome\\nshift\\n+\\nalt\\n+\\nH\\n</li>\\n<li>\\nOrders\\nshift\\n+\\nalt\\n+\\nO\\n</li>\\n<li>\\n<button>\\n<div>\\n<span>Add to cart</span>\\n<div>\\n<span>shift</span>\\n<span>+</span>\\n<span>alt</span>\\n<span>+</span>\\n<span>K</span>\\n</div>\\n</div>\\n</button>\\n</li>\\n<li>\\n<button>\\n<div>\\n<span>Show/Hide shortcuts</span>\\n<div>\\n<span>shift</span>\\n<span>+</span>\\n<span>alt</span>\\n<span>+</span>\\n<span>Z</span>\\n</div>\\n</div>\\n</button>\\n</li>\\n</ul>\\n<div>\\n<div>\\n<div>\\n<div>\\n<span>To move between items, use your keyboard's up or down arrows.</span>\\n</div>\\n</div>\\n</div>\\n</div>\\n</nav></div></body><body><div><div><div><div>\\n<div>\\n<div><div><div><ul><li><span>Home & Kitchen</span></li><li><span>›</span></li><li><span>Kitchen & Dining</span></li><li><span>›</span></li><li><span>Small Appliances</span></li><li><span>›</span></li><li><span>Rice Cookers</span></li></ul></div></div></div> </div>\\n</div></div></div></div></body></html>\", '<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div> <div> <h5> <div>\\n<div> <span> Deal Price Regular Price </span> </div> </div>\\n</h5> <div>\\n<div>\\n<div>\\n<div>\\n<div>\\n<div> <div>\\n<div>\\n<span><span>$79.99</span><span><span>$</span><span>79<span>.</span></span><span>99</span></span></span> </div>\\n</div> </div> </div>\\n</div>\\n</div>\\n</div>\\n<div>\\n<div> <div> <div> <span> Ships from: </span> <span> Amazon.com </span> </div> </div> <div> <div> <span> Sold by: </span> <span> Amazon.com </span> </div> </div> </div> </div>\\n</div>\\n<div><form><input/></form></div><div><form><div><div><div>\\n<div>\\n<span> $235.34 Shipping & Import Fees Deposit to Egypt </span> <span> Details </span> <div> <h3>Shipping & Fee Details</h3>\\n<table> <tr> <td> <span> Price </span> </td> <td> <span> $99.95 </span> </td> </tr> <tr> <td> <span> AmazonGlobal Shipping </span> </td> <td> <span> $81.05 </span> </td> </tr> <tr> <td> <span> \\n Estimated Import Fees Deposit\\n</span> </td> <td> <span> $154.29 </span> </td> </tr> <tr> <td> <span>Total</span> </td> <td> <span> $335.29 </span> </td> </tr> </table> </div>\\n</div>\\n<div>\\n<div>\\n<div><div><div><span> Delivery <span>Sunday, July 13</span>. Order within <span>23 hrs 59 mins</span> </span></div></div></div> </div>\\n<div>\\n<span> \\nDeliver to\\xa0Egypt\\n </span> </div>\\n</div>\\n</div></div></div></form></div><div><form><div><div><div>\\n<div> <div> <span> In Stock </span> </div> </div> </div></div></div></form></div></div> <div>\\n<div>\\n<div> <div> <span> This deal is exclusively for Amazon Prime members. </span> </div>\\n<div> <div>\\n<span><span><input/><span> Join Prime </span></span></span> </div>\\n<div> <span>Cancel anytime</span> </div> </div> <div> <span> Already a member? </span> Sign in </div> </div> </div>\\n<div>\\n<div> <div> <div> <div>\\n<div>\\n<div>\\n<div>\\n<span>Ships from</span> </div>\\n</div>\\n<div>\\n<div>\\n<span>Amazon.com</span> </div>\\n<span> Amazon.com </span> <div> <div> <div> <span>Ships from</span> </div> <div> Amazon.com </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Sold by</span> </div>\\n</div>\\n<div>\\n<div>\\n<span>Amazon.com</span> </div>\\n<span> Amazon.com </span> <div> <div> <div> <span>Sold by</span> </div> <div> Amazon.com </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Returns</span> </div>\\n</div>\\n<div>\\n<span> 30-day refund/replacement </span> <div> <div> <div> <span>30-day refund/replacement</span> </div> <div> This item can be returned in its original condition for a full refund or replacement within 30 days of receipt. </div> <div> Read full return policy </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Payment</span> </div>\\n</div>\\n<div>\\n<span> Secure transaction </span> <div> <div> <div> <span>Your transaction is secure</span> </div> <div> We work hard to protect your security and privacy. Our payment security system encrypts your information during transmission. We don’t share your credit card details with third-party sellers, and we don’t sell your information to others. Learn more </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Support</span> </div>\\n</div>\\n<div>\\n<span> Product support included </span> <div> <div> <div> <span>What\\'s Product Support?</span> </div> <div> In the event your product doesn\\'t work as expected or you need help using it, Amazon offers free product support options such as live phone/chat with an Amazon associate, manufacturer contact information, step-by-step troubleshooting guides, and help videos. \\nBy solving product issues, we help the planet by extending the life of products. Availability of support options differ by product and country. Learn more </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Packaging</span> </div>\\n</div>\\n<div>\\n<span> Ships in product packaging </span> <div> <div> <div> <span>Ships in product packaging</span> </div> <div> <p>This item has been tested to certify it can ship safely in its original box or bag to avoid unnecessary packaging. Since 2015, we have reduced the weight of outbound packaging per shipment by 41% on average, that’s over 2 million tons of packaging material.</p><i>If you still require Amazon packaging for this item, choose \"Ship in Amazon packaging\" at checkout. </i> Learn more </div> </div> </div> </div> </div>\\n</div>\\n</div> <div> <div>See more</div> </div> </div> </div> </div>\\n</div> </div></div></div></div></div></div></div></div></div></div></div></div></div></div></body></html>', \"<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><form><div><div><div>\\n<div>\\n<div> <span> <div> <div>\\n<span><label>Quantity:</label><select> <option>1 </option> <option>2 </option> <option>3 </option> <option>4 </option> <option>5 </option> <option>6 </option> <option>7 </option> <option>8 </option> <option>9 </option> <option>10 </option> <option>11 </option> <option>12 </option> <option>13 </option> <option>14 </option> <option>15 </option> <option>16 </option> <option>17 </option> <option>18 </option> <option>19 </option> <option>20 </option> <option>21 </option> <option>22 </option> <option>23 </option> <option>24 </option> <option>25 </option> <option>26 </option> <option>27 </option> <option>28 </option> <option>29 </option> <option>30 </option> </select><span><span><span><span>Quantity:</span><span>1</span></span></span></span></span> </div> </div> <span><input/><span> Buy Now </span></span></span> <div><div> <span> Enhancements you chose aren't available for this seller. </span> <span> Details </span> <div> <div> <div> <div> <span> To add the following enhancements to your purchase, choose a different seller. </span> </div> <div> <span>%cardName%</span> </div> </div> </div> </div> </div></div></div> <span> <span><span><input/><span>Add to Cart</span></span></span> </span></div> <input/><div> <div> <span>$</span><span><span><span>$79.99</span><span><span>79<span>.</span></span><span>99</span></span></span></span> <span> \\n ()\\n </span> <span> Includes selected options. </span> <span> Includes initial monthly payment and selected options. </span> <span> <span> <span>\\n Details </span>\\n</span> <div> <div> <div> <div> <div><div> <div> <span>Price</span> <span> <span> <span> (</span><span>$</span><span>79<span>.</span></span><span>99</span><span>x)</span> </span> </span> </div> <div> <span> <span> <span>$</span><span>79<span>.</span></span><span>99</span> </span> </span> </div> </div></div> </div> <div> <div><div> <div> <span>Subtotal</span> </div> <div> <span> <span>$</span><span><span><span>$79.99</span><span><span>79<span>.</span></span><span>99</span></span></span></span> </span> </div> </div></div> <div><div> <div> <span>Subtotal</span> </div> </div></div> <div> <div> <span>Initial payment breakdown</span> </div> </div> <div> <span>Shipping cost, delivery date, and order total (including tax) shown at checkout.</span>\\n</div> </div> </div> </div> </div> </span> </div> </div></div></div></div></form></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></body></html>\", '<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><form><div><div><div>\\n<div> <div> <div> <div>\\n<div>\\n<div>\\n<div>\\n<span>Ships from</span> </div>\\n</div>\\n<div>\\n<div>\\n<span>Amazon.com</span> </div>\\n<span> Amazon.com </span> <div> <div> <div> <span>Ships from</span> </div> <div> Amazon.com </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Sold by</span> </div>\\n</div>\\n<div>\\n<div>\\n<span>Amazon.com</span> </div>\\n<span> Amazon.com </span> <div> <div> <div> <span>Sold by</span> </div> <div> Amazon.com </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Returns</span> </div>\\n</div>\\n<div>\\n<span> 30-day refund/replacement </span> <div> <div> <div> <span>30-day refund/replacement</span> </div> <div> This item can be returned in its original condition for a full refund or replacement within 30 days of receipt. </div> <div> Read full return policy </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<div>\\n<div>\\n<span>Packaging</span> </div>\\n</div>\\n<div>\\n<span> Ships in product packaging </span> <div> <div> <div> <span>Ships in product packaging</span> </div> <div> <p>This item has been tested to certify it can ship safely in its original box or bag to avoid unnecessary packaging. Since 2015, we have reduced the weight of outbound packaging per shipment by 41% on average, that’s over 2 million tons of packaging material.</p><i>If you still require Amazon packaging for this item, choose \"Ship in Amazon packaging\" at checkout. </i> Learn more </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Payment</span> </div>\\n</div>\\n<div>\\n<span> Secure transaction </span> <div> <div> <div> <span>Your transaction is secure</span> </div> <div> We work hard to protect your security and privacy. Our payment security system encrypts your information during transmission. We don’t share your credit card details with third-party sellers, and we don’t sell your information to others. Learn more </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Support</span> </div>\\n</div>\\n<div>\\n<span> Product support included </span> <div> <div> <div> <span>What\\'s Product Support?</span> </div> <div> In the event your product doesn\\'t work as expected or you need help using it, Amazon offers free product support options such as live phone/chat with an Amazon associate, manufacturer contact information, step-by-step troubleshooting guides, and help videos. \\nBy solving product issues, we help the planet by extending the life of products. Availability of support options differ by product and country. Learn more </div> </div> </div> </div> </div>\\n</div>\\n</div>\\n</div>\\n</div> <div> <div>See more</div> </div> </div> <label><input/><span>Add a gift receipt for easy returns</span></label></div> </div></div></div></form></div><span>Instant Pot Duo</span></div></div></div>\\n</div>\\n</div></div> \\xa0 Report an issue with this product or seller<h4>Product voltage: 120</h4></div> <span> <span>8K+ bought</span><span> in past month</span> </span>Brief content visible, double tap to read full content.</div> Visit the Instant Pot Store \\n <ul> <li><span> 7-IN-1 FUNCTIONALITY: Pressure cook, slow cook, rice cooker, yogurt maker, steamer, sauté pan and food warmer. </span></li> <li><span> QUICK ONE-TOUCH COOKING: 13 customizable Smart Programs for pressure cooking ribs, soups, beans, rice, poultry, yogurt, desserts and more. </span></li> <li><span> COOK FAST OR SLOW: Pressure cook delicious one-pot meals up to 70% faster than traditional cooking methods or slow cook your favorite traditional recipes – just like grandma used to make. </span></li> <li><span> QUICK AND EASY CLEAN UP: Finger-print resistant, stainless-steel sides and dishwasher-safe lid, inner pot, and accessories. </span></li> <li><span> SAFETY FEATURES: Includes over 10 safety features, plus overheat protection and safe-locking lid </span></li> <li><span> GREAT FOR GROWING FAMILIES: Cook for up to 6 people – perfect for growing families, or meal prepping and batch cooking for singles. </span></li> <li><span> VERSATILE INNER COOKING POT: We use food-grade stainless-steel, a tri-ply bottom for more even cooking and perfect for sautéing </span></li> <li><span> DISCOVER AMAZING RECIPES: Includes the free Instant Brands Connect App, where you can find new recipes to create quick favorites and prepare delicious meals, available for iOS and Android. </span></li> </ul><div> <span>›</span> See more product details </div> <div><div> <div> Instant Pot RIO, 7-in-1 Electric Multi-Cooker, PressureCooker, SlowCooker, RiceCooker, Steamer, Sauté, Yogurt Maker, & Warmer, Includes App With Over 800 Recipes, 6 Quart <span>$94.95</span> (2,374) <span>In Stock</span> </div> </div></div></div></div></div></div></div></div></div></body></html>', \"<html><body><div><div><div><div>\\n<div>\\n<div><div> <div><input/> <div><div><h2>Deals on related products</h2> <div> <div> <span> Sponsored </span> </div> </div> </div><div><span><span>Page <span>1</span> of <span>1</span></span><span>Start over</span></span></div></div> <div> <div><div><div>Previous page of related Sponsored Products</div><div><div><ol> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> CHEF iQ Smart Pressure Cooker with WiFi and Built-in Scale - Easy-to-Use 10-in-1 Mu... </div> <div> 2,645 </div> With Prime <div> -30%$139.98$139.98List Price:$199.99$199.99 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> Nuwave Duet Air Fryer, Electric Pressure Cooker & Grill Combo, 540 IN 1 Multicooker... </div> <div> 378 </div> With Prime <div> -10%$146.10$146.10Typical price:$162.33$162.33 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> Hamilton Beach 3-in-1 Electric Egg Cooker for Hard Boiled Eggs, Poacher Eggs, Omele... </div> <div> 5,210 </div> <div> Amazon's\\xa0Choice </div> Ends in <div> -19%$16.98$16.98List:$20.95$20.95 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> CUCKOO CRP-ST1009FW 10-Cup (Uncooked) / 20-Cup (Cooked) Twin Pressure Rice Cooker &... </div> <div> 366 </div> <div> Amazon's\\xa0Choice </div> With Prime <div> -31%$239.99$239.99List Price:$349.99$349.99 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> WantJoin Pressure Cooker, 8 Quart Stainless Steel Pressure Canner, Induction Compat... </div> <div> 947 </div> <div> Amazon's\\xa0Choice </div> Limited time deal <div> -10%$80.89$80.89List:$89.99$89.99 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> Buffalo Classic Rice Cooker with Clad Stainless Steel Inner Pot - Electric Rice Coo... </div> <div> 479 </div> Ends in <div> -15%$126.65$126.65List Price$149.00$149.00 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> Pizza Oven Indoor, Countertop Electric Pizza Maker 12-inch, 2-minute Pizza, 6 Prese... </div> <div> 12 </div> Limited time deal <div> -15%$169.99$169.99List:$199.99$199.99 </div> </div> </li> </ol></div></div><div>Next page of related Sponsored Products</div></div></div> </div> <span>\\n<div>\\n<div><div><div><div><h2>Customer reviews</h2></div><div><div><div><i><span>4.6 out of 5 stars</span></i></div><div><div><span><span>4.6 out of 5</span></span></div></div></div></div><div><span>130,204 global ratings</span></div><div><div><div><ul><li><span>5 star4 star3 star2 star1 star5 star83%10%3%1%3%83%</span></li><li><span>5 star4 star3 star2 star1 star4 star83%10%3%1%3%10%</span></li><li><span>5 star4 star3 star2 star1 star3 star83%10%3%1%3%3%</span></li><li><span>5 star4 star3 star2 star1 star2 star83%10%3%1%3%1%</span></li><li><span>5 star4 star3 star2 star1 star1 star83%10%3%1%3%3%</span></li></ul></div></div></div><div><div><div><div>How customer reviews and ratings work<div><p>Customer Reviews, including Product Star Ratings help customers to learn more about the product and decide whether it is the right product for them.</p><p>To calculate the overall star rating and percentage breakdown by star, we don’t use a simple average. Instead, our system considers things like how recent a review is and if the reviewer bought the item on Amazon. It also analyzed reviews to verify trustworthiness.</p>Learn more how customers reviews work on Amazon</div></div></div></div></div></div></div></div>\\n</div></span><div><h3>Review this product</h3><div>Share your thoughts with other customers</div><div><span><span>Write a customer review</span></span></div></div></div> </div></div>\\n</div></div></div></div></div></body></html>\", '<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><h3>Customers say</h3><div><div><div><div><h4>Select to learn more</h4></div><div>Works wellEase of useCook timeAppliance qualityCooking abilityEase of cleaningFlavorBuild quality</div></div></div></div><div><div><div><div><div><div><div><span>8,040 customers mention \"Works well\"</span><span>6,940 positive</span><span>1,100 negative</span></div></div></div><div><div><p>Customers find that the pressure cooker works well, with the sauté feature performing particularly effectively.</p></div></div><div><div><p>\"...This <b>works with new potatoes</b>, and regular potatoes! Happy Instant Potting!\" Read more</p></div></div><div><div><p>\"...<b>It was excellent</b>. I did 6 minutes per pound + 2 minutes. I also cook chicken thighs for dinner about once a week, which I had never cooked before....\" Read more</p></div></div><div><div><p>\"...Most <b>programs work just fine on full automatic</b>, but some small exceptions may demand more online flexibility....\" Read more</p></div></div><div><div><p>\"...occasional mishaps, the Instant Pot Duo has consistently <b>delivered incredible results</b>....\" Read more</p></div></div></div></div></div></div><div><div><div><div><div><div><div><span>7,651 customers mention \"Ease of use\"</span><span>6,651 positive</span><span>1,000 negative</span></div></div></div><div><div><p>Customers find the pressure cooker simple to use, with clear operating instructions in the booklet, making meal preparation a breeze.</p></div></div><div><div><p>\"...make in your Instant Pot that will change your life: <b>incredibly easy perfectly poached eggs</b> in 2-3 minutes, and baked potatoes in 12 minutes....\" Read more</p></div></div><div><div><p>\"...credit as most automatic settings work well, automating it for <b>ease of use</b> and safety. Cooking is part Science, but, I think, more Art than Science....\" Read more</p></div></div><div><div><p>\"...crockpot extensively over the past years and while I appreciate the <b>ease of use</b> and the ability to put a meal on the table soon after I got home in...\" Read more</p></div></div><div><div><p>\"...of pressure cookers anymore, the time , energy bills saved n <b>convenience is worth it</b>!...\" Read more</p></div></div></div></div></div></div><div><div><div><div><div><div><div><span>6,666 customers mention \"Cook time\"</span><span>6,260 positive</span><span>406 negative</span></div></div></div><div><div><p>Customers appreciate the pressure cooker\\'s quick cooking time, with one mentioning it can make rice in just 10 minutes, while another notes it cooks like a crockpot in 1/8th the time.</p></div></div><div><div><p>\"...incredibly easy perfectly poached eggs in 2-3 minutes, and <b>baked potatoes in 12 minutes</b>....\" Read more</p></div></div><div><div><p>\"...My kids love it. <b>8 minutes on manual with a natural release</b>. I just stir it with a fork and don\\'t even need to blend it....\" Read more</p></div></div><div><div><p>\"...steel liner (looks like chrome), along with the <b>delay and cooking timer auto-shutoff</b>. This sets it apart from old-time swisher type 1st Gen P.C.\\'s....\" Read more</p></div></div><div><div><p>\"...versatile appliance seamlessly transforms into a pressure cooker, <b>slow cooker</b>, rice cooker, steamer, sauté pan, yogurt maker, warmer, and even a...\" Read more</p></div></div></div></div></div></div><div><div><div><div><div><div><div><span>5,399 customers mention \"Appliance quality\"</span><span>5,399 positive</span><span>0 negative</span></div></div></div><div><div><p>Customers find the pressure cooker to be a fabulous kitchen appliance, with one customer noting its versatility as both a pressure cooker and crockpot.</p></div></div><div><div><p>\"...When you are ready for your potatoes, they will be <b>perfectly done</b> and waiting for you, even if you have abandoned them for hours!...\" Read more</p></div></div><div><div><p>\"...I have to use a rapid boil just to make tea. A <b>pressure cooker is the great equalizer</b>, a must at higher altitudes because 15 lbs is 15 lbs pressure...\" Read more</p></div></div><div><div><p>\"...This <b>versatile appliance seamlessly transforms into a pressure cooker</b>, slow cooker, rice cooker, steamer, sauté pan, yogurt maker, warmer, and even...\" Read more</p></div></div><div><div><p>\"...It\\'s just better insulated, but I\\'ve found that <b>meals are so good under pressure</b> that there\\'s no need to use the slow cooker function....\" Read more</p></div></div></div></div></div></div></div><p><span>Customers find the pressure cooker works well, particularly praising its sauté feature and accurate cooking times. They appreciate its ease of use, with one customer noting the intuitive controls, and consider it a great kitchen appliance that makes meal prep convenient. The appliance receives positive feedback for its cooking ability, with one customer highlighting its versatility in transforming into a pressure cooker, and customers find it easy to clean with a stainless steel pot that cleans well. Customers enjoy the complex flavors produced, though opinions on build quality are mixed, with some finding it well-made while others describe it as wimpy.</span></p><p><span>AI Generated from the text of customer reviews</span></p></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></body></html>', '<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><span>4,396 customers mention \"Cooking ability\"3,065 customers mention \"Ease of cleaning\"2,827 customers mention \"Flavor\"2,273 customers mention \"Build quality\"</span><span>4,298 positive</span><span>98 negative</span>Amazon Customer<i><span>5 out of 5 stars</span></i>MoreHide</div><h5>This has changed the way we eat. It\\'s easier to use than I thought it would be.</h5></div></div><div><div><p>Customers praise the pressure cooker\\'s cooking ability, particularly its amazing recipes and rice cooking feature, with one customer noting it makes stir-fry dishes and another mentioning it\\'s easy to use on the dining room table.</p></div></div><div><div><p>\"...there in the morning, leave for the day, and come back to a <b>perfectly cooked whatever</b>, just waiting for you! Booyah!...\" Read more</p></div></div><div><div><p>\"...You could <b>very easily cook on the dining room table</b>, or a small adjacent table....\" Read more</p></div></div><div><div><p>\"...While the <b>free app provided great recipes</b> and guidance, a comprehensive manual would have been helpful for understanding all the features and...\" Read more</p></div></div><div><div><p>\"...This handy appliance has <b>transformed my summertime cooking</b>, allowing me to break away from our usual salads and grilled chicken rut....\" Read more</p></div></div></div>Sorry we couldn\\'t load the review</div><span><div><div><div>Thank you for your feedback</div><button>Close</button></div></div></span><span><div><div><div>Sorry, there was an error</div><button>Close</button></div></div></span></div><button><span>All photos</span></button></div></div><input/><div><div><div>Previous page</div><div>Next page</div></div></div></div>\\n View Image Gallery\\n</div></div></div><span><div><div><div><h3>\\n Top reviews from the United States\\n</h3></div></div></div></span><span><div><div><div><div><div><div><h4>There was a problem filtering reviews. Please reload the page.</h4></div></div></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div>Anne P. Mitchell</div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><h5>5.0 out of 5 stars\\nI LOVE My Instant Pot! But Here\\'s What I Wish I\\'d Known when I First Got It\\n</h5></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><span>Reviewed in the United States on April 16, 2016</span></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><span>Size: 6 Quarts</span>Verified Purchase</div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><span><div><div><span><br/></span></div></div></span></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><span><div><div>Read more</div></div></span></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><span>\\n<div><span>30,253 people found this helpful</span></div>\\n<div>\\n<span><span>\\n Helpful\\n</span></span></div>\\n</span><span>\\n<span><span>Report</span></span></span>\\n</div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div>Aundrea</div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><h5>5.0 out of 5 stars\\nThis has changed the way we eat. It\\'s easier to use than I thought it would be.\\n</h5></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><span>Reviewed in the United States on August 18, 2016</span></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><div><div><div>\\n<div>\\nAundrea\\n</div>\\n<i><span>5.0 out of 5 stars</span></i>\\n<h5>\\n This has changed the way we eat. It\\'s easier to use than I thought it would be.\\n </h5>\\n<span>\\n Reviewed in the United States on August 18, 2016\\n </span>\\n</div></div></div></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><div><div><span><br/></span></div></div></div></div></div></span></li></ul></div></div></div></div></span></div></div></div></div></div></div></div></div></div></div></div></body></html>', \"<html><body><div><div><div><div><div><div><div><div><div><div><div><span><div><div><div><div><ul><li><span><div><div><div><div><div><div>\\n<h6>\\n Images in this review\\n </h6>\\n</div></div></div><span>\\n<div><span>5,558 people found this helpful</span></div>\\n<div>\\n<span><span>\\n Helpful\\n</span></span></div>\\n</span>\\n</div></div></div></span></li></ul>See more reviews</div></div></div></div><span><div>\\n<h3>\\n Top reviews from other countries\\n </h3>\\n<div>\\n<div><span><span><span>Translate all reviews to English</span></span></span>\\n</div>\\n</div>\\n</div></span><span><div><div><ul><li><span>\\n<div><div>\\n<div><div><div><span>Alheny</span></div></div></div><div><h5><i><span>5.0 out of 5 stars</span></i><span>\\n<span>Excelente</span>\\n</span></h5></div><span>Reviewed in Mexico on June 4, 2025</span><div><span>Size: 6 Quarts</span>Verified Purchase</div><div><span>\\n<div><div>\\n<span>Excelente producto, la recomiendo totalmente, facilita el trabajo en la cocina</span>\\n</div><div>Read more</div></div></span></div><div><span>\\n<span><span>Report</span></span></span>\\n<div><span>Translate review to English</span></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><div><div><span>NeuroEmergent</span></div></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><h5><i><span>5.0 out of 5 stars</span></i><span>\\n<span>A truly Canadian innovation - Instant Pot is the best item in my kitchen, hands down</span>\\n</span></h5></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><span>Reviewed in Canada on November 23, 2017</span></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><span>Size: 6 Quarts</span>Verified Purchase</div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><span><div><div>Read more</div></div></span></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><div><div><div>\\n<div>\\n<div><div><span>NeuroEmergent</span></div></div>\\n</div>\\n<i><span>5.0 out of 5 stars</span></i>\\n<h5>\\n A truly Canadian innovation - Instant Pot is the best item in my kitchen, hands down\\n </h5>\\n<span>\\n Reviewed in Canada on November 23, 2017\\n </span>\\n</div></div></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><div><div><span><br/></span></div></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><div><div><div>\\n<h6>\\n Images in this review\\n </h6>\\n</div></div></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><span>\\n<span><span>Report</span></span></span>\\n</div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div>\\n<div><div><div><span>MV</span></div></div></div><div><h5><i><span>5.0 out of 5 stars</span></i><span>\\n<span>3 Qt Instant Pot. LOVE IT!!!</span>\\n</span></h5></div><span>Reviewed in Canada on December 25, 2024</span><div><span>Size: 3 Quarts</span>Verified Purchase</div><div><span><div><div>\\n<span>My main cooking appliance. Uses only 675 watts max to build pressure, then mostly 0 watts under pressure but occasionally spiking back to 675 watts to keep the pressure. 3 Qts is a great size for 1 or 2 people, or even more depending on what you are cooking. Takes some practice, reading the manual and recipe guide and trial and error to tweak preferred times. It will even boil a pot of water like a kettle, which I tried as a test but forgot to time it. Fantastic appliance for off grid energy efficiency and used far more than the induction hot plate. So far nothing it hasn't cooked. Also extremely safe with the On Off and delay timers and turns off if it were to boil dry, unlike a gas stove which could burn down your house. Can't tell you how I know that. Fantastic for seniors for safety if they can get over all the preset buttons which are not needed anyway and just learn to use the few buttons and functions required to cook almost anything. Highly Recommended.</span>\\n</div><div>Read more</div></div></span></div><div><span>\\n<span><span>Report</span></span></span>\\n</div></div></div></span></li></ul></div></div></span></span></div></div></div></div></div></div></div></div></div></div></div></body></html>\", '<html><body><div><div><div><div><div><div><div><div><div><div><div><span><span><div><div><ul><li><span>\\n<div><div>\\n<div><div><div><span>Laissan sayab perez</span></div></div></div><div><h5><i><span>5.0 out of 5 stars</span></i><span>\\n<span>Gran inversión para la cocina</span>\\n</span></h5></div><span>Reviewed in Mexico on March 30, 2025</span><div><span>Size: 3 Quarts</span>Verified Purchase</div><div><span><div><div>\\n<span>Gran inversión para la cocina, soy una persona muy ocupada y me gusta cuidar de mi salud me cocino, pero en los guisos y cocciones de frijoles se consume mucho gas , opté por esta olla que vi, ya hice mi primer caldo de res con verduras quedó la carne muy suave en poco tiempo ⏱️ me encantó, tiene muy buena seguridad para la presión.Lo que me encanta:✔️ Cocina mucho más rápido que una olla convencional.✔️ Tiene varias funciones, desde cocción a presión hasta salteado.✔️ Es segura y fácil de limpiar.Lo que podría mejorar:🔹 La curva de aprendizaje puede ser un poco alta al inicio, pero una vez que entiendes los tiempos y funciones, todo es sencillo.En general, es una excelente compra si quieres ahorrar tiempo en la cocina y hacer recetas deliciosas sin complicaciones. ¡La recomiendo totalmente!</span>\\n</div><div>Read more</div></div></span></div><div>\\n<div>\\n<div>\\n<div>\\n<div>\\n<div><div><span>Laissan </span></div></div>\\n</div>\\n<i><span>5.0 out of 5 stars</span></i>\\n<h5>\\n Gran inversión para la cocina\\n </h5>\\n<span>\\n Reviewed in Mexico on March 30, 2025\\n </span>\\n</div>\\n<span>\\n Gran inversión para la cocina, soy una persona muy ocupada y me gusta cuidar de mi salud me cocino, pero en los guisos y cocciones de frijoles se consume mucho gas , opté por esta olla que vi, ya hice mi primer caldo de res con verduras quedó la carne muy suave en poco tiempo ⏱️ me encantó, tiene muy buena seguridad para la presión.<br/>Lo que me encanta:✔️ Cocina mucho más rápido que una olla convencional.✔️ Tiene varias funciones, desde cocción a presión hasta salteado.✔️ Es segura y fácil de limpiar.Lo que podría mejorar:🔹 La curva de aprendizaje puede ser un poco alta al inicio, pero una vez que entiendes los tiempos y funciones, todo es sencillo.En general, es una excelente compra si quieres ahorrar tiempo en la cocina y hacer recetas deliciosas sin complicaciones. ¡La recomiendo totalmente!\\n</span>\\n<div>\\n<h6>\\n Images in this review\\n </h6>\\n</div>\\n</div>\\n</div>\\n</div>\\n<div><span>\\n<span><span>Report</span></span></span>\\n<div><span>Translate review to English</span></div></div></div></div></span></li><div>See more reviews</div></ul></div></div></span></span></div></div></div></div></div></div>Get to Know Us</div></div></div> <div> <div> <div> Your recently viewed items and featured recommendations </div> <div> <div> <div> <div> › </div> <div> View or edit your browsing history </div> </div> <span> After viewing product detail pages, look here to find an easy way to navigate back to pages you are interested in. </span> </div> </div> </div> </div></div></div></body></html>']\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
454 |
"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n",
|
455 |
-
"Final output: {'productTitle': 'Instant Pot RIO, 7-in-1 Electric Multi-Cooker, PressureCooker, SlowCooker, RiceCooker, Steamer, Sauté, Yogurt Maker, & Warmer, Includes App With Over 800 Recipes, 6 Quart', 'price':
|
456 |
"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"
|
457 |
]
|
458 |
},
|
@@ -460,23 +600,31 @@
|
|
460 |
"data": {
|
461 |
"text/plain": [
|
462 |
"{'productTitle': 'Instant Pot RIO, 7-in-1 Electric Multi-Cooker, PressureCooker, SlowCooker, RiceCooker, Steamer, Sauté, Yogurt Maker, & Warmer, Includes App With Over 800 Recipes, 6 Quart',\n",
|
463 |
-
" 'price':
|
464 |
-
" 'manufacturer': '
|
465 |
]
|
466 |
},
|
467 |
-
"execution_count":
|
468 |
"metadata": {},
|
469 |
"output_type": "execute_result"
|
470 |
}
|
471 |
],
|
472 |
"source": [
|
473 |
-
"pipe.run(content=url,is_url=True, schema=schema, hf=
|
474 |
]
|
475 |
},
|
476 |
{
|
477 |
"cell_type": "code",
|
478 |
"execution_count": null,
|
479 |
-
"id": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
480 |
"metadata": {},
|
481 |
"outputs": [],
|
482 |
"source": []
|
@@ -498,7 +646,7 @@
|
|
498 |
"name": "python",
|
499 |
"nbconvert_exporter": "python",
|
500 |
"pygments_lexer": "ipython3",
|
501 |
-
"version": "3.11.
|
502 |
}
|
503 |
},
|
504 |
"nbformat": 4,
|
|
|
2 |
"cells": [
|
3 |
{
|
4 |
"cell_type": "code",
|
5 |
+
"execution_count": 101,
|
6 |
"id": "5223b1b7",
|
7 |
"metadata": {},
|
8 |
"outputs": [],
|
|
|
15 |
},
|
16 |
{
|
17 |
"cell_type": "code",
|
18 |
+
"execution_count": 102,
|
19 |
+
"id": "5ccc96b7",
|
20 |
+
"metadata": {},
|
21 |
+
"outputs": [],
|
22 |
+
"source": [
|
23 |
+
"def seed_everything(seed=42):\n",
|
24 |
+
" random.seed(seed)\n",
|
25 |
+
" np.random.seed(seed)\n",
|
26 |
+
" torch.manual_seed(seed)\n",
|
27 |
+
"\n",
|
28 |
+
" if torch.cuda.is_available():\n",
|
29 |
+
" torch.cuda.manual_seed(seed)\n",
|
30 |
+
" torch.cuda.manual_seed_all(seed) # if using multi-GPU\n",
|
31 |
+
"\n",
|
32 |
+
" torch.backends.cudnn.deterministic = True\n",
|
33 |
+
" torch.backends.cudnn.benchmark = False\n",
|
34 |
+
"\n",
|
35 |
+
"seed_everything(44)"
|
36 |
+
]
|
37 |
+
},
|
38 |
+
{
|
39 |
+
"cell_type": "code",
|
40 |
+
"execution_count": 103,
|
41 |
"id": "ae4e7f03",
|
42 |
"metadata": {},
|
43 |
"outputs": [
|
|
|
47 |
"True"
|
48 |
]
|
49 |
},
|
50 |
+
"execution_count": 103,
|
51 |
"metadata": {},
|
52 |
"output_type": "execute_result"
|
53 |
}
|
|
|
59 |
},
|
60 |
{
|
61 |
"cell_type": "code",
|
62 |
+
"execution_count": 104,
|
63 |
"id": "9e6b0eb9",
|
64 |
"metadata": {},
|
65 |
"outputs": [],
|
66 |
"source": [
|
67 |
"llm = NvidiaLLMClient(config={'api_key': os.getenv('NVIDIA_API_KEY'),'model_name': 'qwen/qwen2.5-7b-instruct'})\n",
|
68 |
+
"reranker = NvidiaRerankerClient(config={'api_key': os.getenv('NVIDIA_API_KEY'),'model_name': \"nv-rerank-qa-mistral-4b:1\"})\n",
|
69 |
"# reranker = HFRerankerClient()"
|
70 |
]
|
71 |
},
|
72 |
{
|
73 |
"cell_type": "code",
|
74 |
+
"execution_count": 105,
|
75 |
+
"id": "114ce917",
|
76 |
+
"metadata": {},
|
77 |
+
"outputs": [],
|
78 |
+
"source": [
|
79 |
+
"from pydantic import BaseModel, Field, constr, condecimal\n",
|
80 |
+
"\n",
|
81 |
+
"class ProductModel(BaseModel):\n",
|
82 |
+
" productTitle: constr(min_length=1, max_length=200) = Field(\n",
|
83 |
+
" ...,\n",
|
84 |
+
" title=\"Product Title\",\n",
|
85 |
+
" description=\"The full title of the product\"\n",
|
86 |
+
" )\n",
|
87 |
+
" price: condecimal(gt=0, decimal_places=2) = Field(\n",
|
88 |
+
" ...,\n",
|
89 |
+
" title=\"Product Price\",\n",
|
90 |
+
" description=\"Unit price (must be > 0, two decimal places).\"\n",
|
91 |
+
" )\n",
|
92 |
+
" manufacturer: constr(min_length=1, max_length=1000) = Field(\n",
|
93 |
+
" ...,\n",
|
94 |
+
" title=\"Manufacturer\",\n",
|
95 |
+
" description=\"Name of the product manufacturer.\"\n",
|
96 |
+
" )\n"
|
97 |
+
]
|
98 |
+
},
|
99 |
+
{
|
100 |
+
"cell_type": "code",
|
101 |
+
"execution_count": 106,
|
102 |
+
"id": "2d6c1215",
|
103 |
+
"metadata": {},
|
104 |
+
"outputs": [],
|
105 |
+
"source": [
|
106 |
+
"url = \"https://www.amazon.com/Instant-Pot-Multi-Use-Programmable-Pressure/dp/B00FLYWNYQ?_encoding=UTF8&content-id=amzn1.sym.2f889ce0-246f-467a-a086-d9a721167240&dib=eyJ2IjoiMSJ9.2EzBddTDEktLY8ckTsraM_cZ6pzKuNkA6y_gLR0-Uz1ekttQU6tuQEcjb8PThy0PfhvxLqeYWh3N7pQrGgRxAWzapVklC_aU6xBzD-3Wwqx3qyQRHsmOhPRsSpeCOIIZqS3SKDowZEPYrGnCbRMt5vxnsYMW-fD-zBbgeoeGYmbsN2U6_HNhLjrpePKCbQPmnZBJ9UhgYE4fE3DVuYm8xlJe9l5GixDLVFtZUq4m5FE.Ol-jiuu9P6mQie0yXLJj-Ht5-TXmIXuRPije85p_YVo&dib_tag=se&keywords=cooker&pd_rd_r=2cede598-f3ae-49ca-8a46-e5945a9c2631&pd_rd_w=2HLSC&pd_rd_wg=ZyUUn&qid=1749508157&sr=8-3\"\n",
|
107 |
+
"schema = ProductModel # pydantic class\n",
|
108 |
+
"\n",
|
109 |
+
"# read html file \n",
|
110 |
+
"# with open(r'C:\\Users\\abdfa\\Desktop\\UNI STUFFING\\GRADUATION PROJECT\\Group Work\\MCP_WEB2JSON\\0000.htm', 'r', encoding='utf-8') as file:\n",
|
111 |
+
"# content = file.read()\n",
|
112 |
+
"\n",
|
113 |
+
"# with open(r'C:\\Users\\abdfa\\Desktop\\UNI STUFFING\\GRADUATION PROJECT\\Group Work\\MCP_WEB2JSON\\Amazon.com_ Instant Pot Duo 7-in-1 Electric Pressure Cooker, Slow Cooker, Rice Cooker, Steamer, Sauté, Yogurt Maker, Warmer & Sterilizer, Includes App With Over 800 Recipes, Stainless Steel, 6 Quart.htm', 'r', encoding='utf-8') as file:\n",
|
114 |
+
"# content = file.read()\n"
|
115 |
+
]
|
116 |
+
},
|
117 |
+
{
|
118 |
+
"cell_type": "code",
|
119 |
+
"execution_count": 107,
|
120 |
"id": "3bc223d0",
|
121 |
"metadata": {},
|
122 |
"outputs": [],
|
|
|
134 |
},
|
135 |
{
|
136 |
"cell_type": "code",
|
137 |
+
"execution_count": 108,
|
138 |
"id": "475fccd2",
|
139 |
"metadata": {},
|
140 |
"outputs": [],
|
141 |
"source": [
|
142 |
+
"classification_prompt_template = schema.model_json_schema()"
|
|
|
|
|
|
|
|
|
|
|
|
|
143 |
]
|
144 |
},
|
145 |
{
|
146 |
"cell_type": "code",
|
147 |
+
"execution_count": 109,
|
148 |
"id": "974417de",
|
149 |
"metadata": {},
|
150 |
"outputs": [],
|
|
|
202 |
},
|
203 |
{
|
204 |
"cell_type": "code",
|
205 |
+
"execution_count": 110,
|
206 |
"id": "58436d65",
|
207 |
"metadata": {},
|
208 |
"outputs": [],
|
|
|
216 |
},
|
217 |
{
|
218 |
"cell_type": "code",
|
219 |
+
"execution_count": 111,
|
220 |
"id": "c4e75e63",
|
221 |
"metadata": {},
|
222 |
"outputs": [],
|
|
|
274 |
},
|
275 |
{
|
276 |
"cell_type": "code",
|
277 |
+
"execution_count": 112,
|
278 |
"id": "bb4edecf",
|
279 |
"metadata": {},
|
280 |
"outputs": [
|
|
|
284 |
"4"
|
285 |
]
|
286 |
},
|
287 |
+
"execution_count": 112,
|
288 |
"metadata": {},
|
289 |
"output_type": "execute_result"
|
290 |
}
|
|
|
295 |
},
|
296 |
{
|
297 |
"cell_type": "code",
|
298 |
+
"execution_count": 113,
|
299 |
"id": "9927a78e",
|
300 |
"metadata": {},
|
301 |
+
"outputs": [
|
302 |
+
{
|
303 |
+
"name": "stdout",
|
304 |
+
"output_type": "stream",
|
305 |
+
"text": [
|
306 |
+
"raw scores [-11.34375 -13.8984375 -14.7578125 -14.7578125]\n",
|
307 |
+
"Sigmoid scores: [1.18431390e-05 9.20417541e-07 3.89729515e-07 3.89729515e-07]\n",
|
308 |
+
"Normalized scores: [1. 0.0463345 0. 0. ]\n",
|
309 |
+
"Filtered pairs:\n",
|
310 |
+
"[(Document(metadata={'relevance_score': -11.34375}, page_content='\\n <div class=\"product-card\">\\n <h2 class=\"product-title\">Wireless Noise Cancelling Headphones</h2>\\n <p class=\"product-description\">Experience immersive sound with active noise cancellation and long battery life.</p>\\n <span class=\"price\">$299.99</span>\\n <button>Add to Cart</button>\\n </div>\\n '), 1.0)]\n"
|
311 |
+
]
|
312 |
+
}
|
313 |
+
],
|
314 |
"source": [
|
315 |
"output = reranker.rerank(query=classification_prompt_template,passages=html_chunks)"
|
316 |
]
|
317 |
},
|
318 |
{
|
319 |
"cell_type": "code",
|
320 |
+
"execution_count": 114,
|
321 |
"id": "b77015f3",
|
322 |
"metadata": {},
|
323 |
"outputs": [
|
|
|
325 |
"name": "stdout",
|
326 |
"output_type": "stream",
|
327 |
"text": [
|
328 |
+
"\n",
|
329 |
" <div class=\"product-card\">\n",
|
330 |
" <h2 class=\"product-title\">Wireless Noise Cancelling Headphones</h2>\n",
|
331 |
" <p class=\"product-description\">Experience immersive sound with active noise cancellation and long battery life.</p>\n",
|
332 |
" <span class=\"price\">$299.99</span>\n",
|
333 |
" <button>Add to Cart</button>\n",
|
334 |
" </div>\n",
|
335 |
+
" \n",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
336 |
"--------------------------------------------------------------------------------\n"
|
337 |
]
|
338 |
}
|
|
|
345 |
},
|
346 |
{
|
347 |
"cell_type": "code",
|
348 |
+
"execution_count": 115,
|
349 |
"id": "bb3fa1b0",
|
350 |
"metadata": {},
|
351 |
"outputs": [
|
352 |
{
|
353 |
"data": {
|
354 |
"text/plain": [
|
355 |
+
"1"
|
356 |
]
|
357 |
},
|
358 |
+
"execution_count": 115,
|
359 |
"metadata": {},
|
360 |
"output_type": "execute_result"
|
361 |
}
|
|
|
366 |
},
|
367 |
{
|
368 |
"cell_type": "code",
|
369 |
+
"execution_count": 116,
|
370 |
"id": "c1c43f7c",
|
371 |
"metadata": {},
|
372 |
"outputs": [],
|
|
|
376 |
},
|
377 |
{
|
378 |
"cell_type": "code",
|
379 |
+
"execution_count": 117,
|
380 |
"id": "9c78eec9",
|
381 |
"metadata": {},
|
382 |
"outputs": [],
|
|
|
386 |
},
|
387 |
{
|
388 |
"cell_type": "code",
|
389 |
+
"execution_count": 118,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
390 |
"id": "92a5fc23",
|
391 |
"metadata": {},
|
392 |
"outputs": [],
|
|
|
398 |
},
|
399 |
{
|
400 |
"cell_type": "code",
|
401 |
+
"execution_count": 119,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
402 |
"id": "f07e1aca",
|
403 |
"metadata": {},
|
404 |
"outputs": [],
|
|
|
426 |
},
|
427 |
{
|
428 |
"cell_type": "code",
|
429 |
+
"execution_count": 120,
|
430 |
"id": "79cf2321",
|
431 |
"metadata": {},
|
432 |
"outputs": [
|
|
|
435 |
"output_type": "stream",
|
436 |
"text": [
|
437 |
"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n",
|
438 |
+
"raw scores [-12.40625 -12.453125 -12.46875 -12.5546875 -12.6875 ]\n",
|
439 |
+
"Sigmoid scores: [4.09291105e-06 3.90548374e-06 3.84493506e-06 3.52831186e-06\n",
|
440 |
+
" 3.08949445e-06]\n",
|
441 |
+
"Normalized scores: [1. 0.81321087 0.75286836 0.43732325 0. ]\n",
|
442 |
+
"Filtered pairs:\n",
|
443 |
+
"[(Document(metadata={'relevance_score': -12.40625}, page_content='<html><body><div><div><div><div><div><div>\\n<div> <h1> About this item </h1> <ul> <li><span> 7-IN-1 FUNCTIONALITY: Pressure cook, slow cook, rice cooker, yogurt maker, steamer, sauté pan and food warmer. </span></li> <li><span> QUICK ONE-TOUCH COOKING: 13 customizable Smart Programs for pressure cooking ribs, soups, beans, rice, poultry, yogurt, desserts and more. </span></li> <li><span> COOK FAST OR SLOW: Pressure cook delicious one-pot meals up to 70% faster than traditional cooking methods or slow cook your favorite traditional recipes – just like grandma used to make. </span></li> <li><span> QUICK AND EASY CLEAN UP: Finger-print resistant, stainless-steel sides and dishwasher-safe lid, inner pot, and accessories. </span></li> <li><span> SAFETY FEATURES: Includes over 10 safety features, plus overheat protection and safe-locking lid </span></li> <li><span> GREAT FOR GROWING FAMILIES: Cook for up to 6 people – perfect for growing families, or meal prepping and batch cooking for singles. </span></li> <li><span> VERSATILE INNER COOKING POT: We use food-grade stainless-steel, a tri-ply bottom for more even cooking and perfect for sautéing </span></li> <li><span> DISCOVER AMAZING RECIPES: Includes the free Instant Brands Connect App, where you can find new recipes to create quick favorites and prepare delicious meals, available for iOS and Android. </span></li> </ul>\\n<div> <span>›</span> See more product details </div> <div><div> \\xa0 Report an issue with this product or seller</div></div><h4>There is a newer model of this item:</h4><div><div> <div> Instant Pot RIO, 7-in-1 Electric Multi-Cooker, PressureCooker, SlowCooker, RiceCooker, Steamer, Sauté, Yogurt Maker, & Warmer, Includes App With Over 800 Recipes, 6 Quart <span>$94.95</span> (2,377) <span>In Stock</span> </div> </div></div><div><div><h4>Product voltage: 120</h4><div> <span> <p><span>This product is compatible with outlets that support 120 volts and might require a converter when used outside of the United States.</span></p> </span> </div></div></div><div><div><div>\\n<span> <span><span><input/><span> <span>More filters</span> </span></span></span> </span> </div></div></div><div><div><div><div><div><div><div><div><div> <h4>\\n Price \\n </h4>\\n<div> <div> <span>\\n<span> <span><span><input/><span> <span><$25</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span> <span>$25 - $50</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span> <span>$50 - $100</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span> <span>$100 - $200</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span> <span>>$200</span> </span></span></span> </span> </span>\\n</div> </div> </div></div></div></div></div></div></div></div></div></div> </div><h2>\\n Discover similar items</h2></div></div></div></div></div></body></html>'), 1.0), (Document(metadata={'relevance_score': -12.453125}, page_content='<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div> <h4>\\n Color \\n \\n Capacity \\n \\n Grade \\n \\n Power \\n \\n Heat Source \\n \\n Material \\n \\n Slow Cooker Type \\n \\n Brand \\n \\n Closure \\n \\n Width \\n \\n Finish \\n \\n Control Type \\n \\n Free From \\n \\n Heating Elements \\n \\n Heating Type \\n \\n Depth \\n \\n Premium Brands \\n \\n Output Wattage \\n \\n Features \\n \\n Uses \\n \\n Inclusions \\n \\n Height \\n \\n Length \\n \\n Lid Material \\n \\n Shape \\n \\n Top Brands in Home & Kitchen \\n \\n Style \\n </h4>\\n<div> <div> <span>\\n<span> <span><span><input/><span>\\n<span>Black</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Grey</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>White</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Brown</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Beige</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Red</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Pink</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Orange</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Yellow</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Ivory</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Green</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Blue</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Purple</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Gold</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Silver</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Multi</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Clear</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><span>\\n<span>Stainless Steel</span> </span></span></span> </span> </span>\\n</div> </div> <span> <button>\\n<span>Clear Filters</span> </button>\\n</span></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></body></html>'), 0.8132108732268714), (Document(metadata={'relevance_score': -12.46875}, page_content=\"<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><form><div><div><div>\\n<input/>\\n<input/>\\n<input/>\\n<div> <div> <span>$</span><span><span><span>$79.99</span><span><span>79<span>.</span></span><span>99</span></span></span></span> <span> \\n ()\\n </span> <span> Includes selected options. </span> <span> Includes initial monthly payment and selected options. </span> <span> <span> <span>\\n Details </span>\\n</span> <div> <div> <div> <div> <div><div> <div> <span>Price</span> <span> <span> <span> (</span><span>$</span><span>79<span>.</span></span><span>99</span><span>x)</span> </span> </span> </div> <div> <span> <span> <span>$</span><span>79<span>.</span></span><span>99</span> </span> </span> </div> </div></div> </div> <div> <div><div> <div> <span>Subtotal</span> </div> <div> <span> <span>$</span><span><span><span>$79.99</span><span><span>79<span>.</span></span><span>99</span></span></span></span> </span> </div> </div></div> <div><div> <div> <span>Subtotal</span> </div> </div></div> <div> <div> <span>Initial payment breakdown</span> </div> </div> <div> <span>Shipping cost, delivery date, and order total (including tax) shown at checkout.</span>\\n</div> </div> </div> </div> </div> </span> </div> </div> <div> <span> <span><span><input/><span>Add to Cart</span></span></span> </span> </div><div> <div> <span><span><input/><span> Buy Now </span></span></span> </div> </div><div> <div><div><div> <span> Enhancements you chose aren't available for this seller. </span> <span> Details </span> <div> <div> <div> <div> <span> To add the following enhancements to your purchase, choose a different seller. </span> </div> <div> <span>%cardName%</span> </div> </div> </div> </div> </div></div></div> <div><div><div> <span> ${cardName} not available for the seller you chose </span> </div></div></div> <div><div><div> <span> ${cardName} unavailable for quantities greater than ${maxQuantity}. </span> </div></div></div> </div></div></div></div></form></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></body></html>\"), 0.7528683592125947)]\n",
|
444 |
+
"prompt: \n",
|
445 |
+
"You are a helpful assistant that extracts structured data from web pages.\n",
|
446 |
+
"You will be given a web page and you need to extract the following information:\n",
|
447 |
+
"<html><body><div><div><div><div><div><div>\n",
|
448 |
+
"<div> <h1> About this item </h1> <ul> <li><span> 7-IN-1 FUNCTIONALITY: Pressure cook, slow cook, rice cooker, yogurt maker, steamer, sauté pan and food warmer. </span></li> <li><span> QUICK ONE-TOUCH COOKING: 13 customizable Smart Programs for pressure cooking ribs, soups, beans, rice, poultry, yogurt, desserts and more. </span></li> <li><span> COOK FAST OR SLOW: Pressure cook delicious one-pot meals up to 70% faster than traditional cooking methods or slow cook your favorite traditional recipes – just like grandma used to make. </span></li> <li><span> QUICK AND EASY CLEAN UP: Finger-print resistant, stainless-steel sides and dishwasher-safe lid, inner pot, and accessories. </span></li> <li><span> SAFETY FEATURES: Includes over 10 safety features, plus overheat protection and safe-locking lid </span></li> <li><span> GREAT FOR GROWING FAMILIES: Cook for up to 6 people – perfect for growing families, or meal prepping and batch cooking for singles. </span></li> <li><span> VERSATILE INNER COOKING POT: We use food-grade stainless-steel, a tri-ply bottom for more even cooking and perfect for sautéing </span></li> <li><span> DISCOVER AMAZING RECIPES: Includes the free Instant Brands Connect App, where you can find new recipes to create quick favorites and prepare delicious meals, available for iOS and Android. </span></li> </ul>\n",
|
449 |
+
"<div> <span>›</span> See more product details </div> <div><div> Report an issue with this product or seller</div></div><h4>There is a newer model of this item:</h4><div><div> <div> Instant Pot RIO, 7-in-1 Electric Multi-Cooker, PressureCooker, SlowCooker, RiceCooker, Steamer, Sauté, Yogurt Maker, & Warmer, Includes App With Over 800 Recipes, 6 Quart <span>$94.95</span> (2,377) <span>In Stock</span> </div> </div></div><div><div><h4>Product voltage: 120</h4><div> <span> <p><span>This product is compatible with outlets that support 120 volts and might require a converter when used outside of the United States.</span></p> </span> </div></div></div><div><div><div>\n",
|
450 |
+
"<span> <span><span><input/><span> <span>More filters</span> </span></span></span> </span> </div></div></div><div><div><div><div><div><div><div><div><div> <h4>\n",
|
451 |
+
" Price \n",
|
452 |
+
" </h4>\n",
|
453 |
+
"<div> <div> <span>\n",
|
454 |
+
"<span> <span><span><input/><span> <span><$25</span> </span></span></span> </span> </span>\n",
|
455 |
+
"<span>\n",
|
456 |
+
"<span> <span><span><input/><span> <span>$25 - $50</span> </span></span></span> </span> </span>\n",
|
457 |
+
"<span>\n",
|
458 |
+
"<span> <span><span><input/><span> <span>$50 - $100</span> </span></span></span> </span> </span>\n",
|
459 |
+
"<span>\n",
|
460 |
+
"<span> <span><span><input/><span> <span>$100 - $200</span> </span></span></span> </span> </span>\n",
|
461 |
+
"<span>\n",
|
462 |
+
"<span> <span><span><input/><span> <span>>$200</span> </span></span></span> </span> </span>\n",
|
463 |
+
"</div> </div> </div></div></div></div></div></div></div></div></div></div> </div><h2>\n",
|
464 |
+
" Discover similar items</h2></div></div></div></div></div></body></html>\n",
|
465 |
+
"\n",
|
466 |
+
"<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div> <h4>\n",
|
467 |
+
" Color \n",
|
468 |
+
" \n",
|
469 |
+
" Capacity \n",
|
470 |
+
" \n",
|
471 |
+
" Grade \n",
|
472 |
+
" \n",
|
473 |
+
" Power \n",
|
474 |
+
" \n",
|
475 |
+
" Heat Source \n",
|
476 |
+
" \n",
|
477 |
+
" Material \n",
|
478 |
+
" \n",
|
479 |
+
" Slow Cooker Type \n",
|
480 |
+
" \n",
|
481 |
+
" Brand \n",
|
482 |
+
" \n",
|
483 |
+
" Closure \n",
|
484 |
+
" \n",
|
485 |
+
" Width \n",
|
486 |
+
" \n",
|
487 |
+
" Finish \n",
|
488 |
+
" \n",
|
489 |
+
" Control Type \n",
|
490 |
+
" \n",
|
491 |
+
" Free From \n",
|
492 |
+
" \n",
|
493 |
+
" Heating Elements \n",
|
494 |
+
" \n",
|
495 |
+
" Heating Type \n",
|
496 |
+
" \n",
|
497 |
+
" Depth \n",
|
498 |
+
" \n",
|
499 |
+
" Premium Brands \n",
|
500 |
+
" \n",
|
501 |
+
" Output Wattage \n",
|
502 |
+
" \n",
|
503 |
+
" Features \n",
|
504 |
+
" \n",
|
505 |
+
" Uses \n",
|
506 |
+
" \n",
|
507 |
+
" Inclusions \n",
|
508 |
+
" \n",
|
509 |
+
" Height \n",
|
510 |
+
" \n",
|
511 |
+
" Length \n",
|
512 |
+
" \n",
|
513 |
+
" Lid Material \n",
|
514 |
+
" \n",
|
515 |
+
" Shape \n",
|
516 |
+
" \n",
|
517 |
+
" Top Brands in Home & Kitchen \n",
|
518 |
+
" \n",
|
519 |
+
" Style \n",
|
520 |
+
" </h4>\n",
|
521 |
+
"<div> <div> <span>\n",
|
522 |
+
"<span> <span><span><input/><span>\n",
|
523 |
+
"<span>Black</span> </span></span></span> </span> </span>\n",
|
524 |
+
"<span>\n",
|
525 |
+
"<span> <span><span><input/><span>\n",
|
526 |
+
"<span>Grey</span> </span></span></span> </span> </span>\n",
|
527 |
+
"<span>\n",
|
528 |
+
"<span> <span><span><input/><span>\n",
|
529 |
+
"<span>White</span> </span></span></span> </span> </span>\n",
|
530 |
+
"<span>\n",
|
531 |
+
"<span> <span><span><input/><span>\n",
|
532 |
+
"<span>Brown</span> </span></span></span> </span> </span>\n",
|
533 |
+
"<span>\n",
|
534 |
+
"<span> <span><span><input/><span>\n",
|
535 |
+
"<span>Beige</span> </span></span></span> </span> </span>\n",
|
536 |
+
"<span>\n",
|
537 |
+
"<span> <span><span><input/><span>\n",
|
538 |
+
"<span>Red</span> </span></span></span> </span> </span>\n",
|
539 |
+
"<span>\n",
|
540 |
+
"<span> <span><span><input/><span>\n",
|
541 |
+
"<span>Pink</span> </span></span></span> </span> </span>\n",
|
542 |
+
"<span>\n",
|
543 |
+
"<span> <span><span><input/><span>\n",
|
544 |
+
"<span>Orange</span> </span></span></span> </span> </span>\n",
|
545 |
+
"<span>\n",
|
546 |
+
"<span> <span><span><input/><span>\n",
|
547 |
+
"<span>Yellow</span> </span></span></span> </span> </span>\n",
|
548 |
+
"<span>\n",
|
549 |
+
"<span> <span><span><input/><span>\n",
|
550 |
+
"<span>Ivory</span> </span></span></span> </span> </span>\n",
|
551 |
+
"<span>\n",
|
552 |
+
"<span> <span><span><input/><span>\n",
|
553 |
+
"<span>Green</span> </span></span></span> </span> </span>\n",
|
554 |
+
"<span>\n",
|
555 |
+
"<span> <span><span><input/><span>\n",
|
556 |
+
"<span>Blue</span> </span></span></span> </span> </span>\n",
|
557 |
+
"<span>\n",
|
558 |
+
"<span> <span><span><input/><span>\n",
|
559 |
+
"<span>Purple</span> </span></span></span> </span> </span>\n",
|
560 |
+
"<span>\n",
|
561 |
+
"<span> <span><span><input/><span>\n",
|
562 |
+
"<span>Gold</span> </span></span></span> </span> </span>\n",
|
563 |
+
"<span>\n",
|
564 |
+
"<span> <span><span><input/><span>\n",
|
565 |
+
"<span>Silver</span> </span></span></span> </span> </span>\n",
|
566 |
+
"<span>\n",
|
567 |
+
"<span> <span><span><input/><span>\n",
|
568 |
+
"<span>Multi</span> </span></span></span> </span> </span>\n",
|
569 |
+
"<span>\n",
|
570 |
+
"<span> <span><span><input/><span>\n",
|
571 |
+
"<span>Clear</span> </span></span></span> </span> </span>\n",
|
572 |
+
"<span>\n",
|
573 |
+
"<span> <span><span><span>\n",
|
574 |
+
"<span>Stainless Steel</span> </span></span></span> </span> </span>\n",
|
575 |
+
"</div> </div> <span> <button>\n",
|
576 |
+
"<span>Clear Filters</span> </button>\n",
|
577 |
+
"</span></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></body></html>\n",
|
578 |
+
"\n",
|
579 |
+
"<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><form><div><div><div>\n",
|
580 |
+
"<input/>\n",
|
581 |
+
"<input/>\n",
|
582 |
+
"<input/>\n",
|
583 |
+
"<div> <div> <span>$</span><span><span><span>$79.99</span><span><span>79<span>.</span></span><span>99</span></span></span></span> <span> \n",
|
584 |
+
" ()\n",
|
585 |
+
" </span> <span> Includes selected options. </span> <span> Includes initial monthly payment and selected options. </span> <span> <span> <span>\n",
|
586 |
+
" Details </span>\n",
|
587 |
+
"</span> <div> <div> <div> <div> <div><div> <div> <span>Price</span> <span> <span> <span> (</span><span>$</span><span>79<span>.</span></span><span>99</span><span>x)</span> </span> </span> </div> <div> <span> <span> <span>$</span><span>79<span>.</span></span><span>99</span> </span> </span> </div> </div></div> </div> <div> <div><div> <div> <span>Subtotal</span> </div> <div> <span> <span>$</span><span><span><span>$79.99</span><span><span>79<span>.</span></span><span>99</span></span></span></span> </span> </div> </div></div> <div><div> <div> <span>Subtotal</span> </div> </div></div> <div> <div> <span>Initial payment breakdown</span> </div> </div> <div> <span>Shipping cost, delivery date, and order total (including tax) shown at checkout.</span>\n",
|
588 |
+
"</div> </div> </div> </div> </div> </span> </div> </div> <div> <span> <span><span><input/><span>Add to Cart</span></span></span> </span> </div><div> <div> <span><span><input/><span> Buy Now </span></span></span> </div> </div><div> <div><div><div> <span> Enhancements you chose aren't available for this seller. </span> <span> Details </span> <div> <div> <div> <div> <span> To add the following enhancements to your purchase, choose a different seller. </span> </div> <div> <span>%cardName%</span> </div> </div> </div> </div> </div></div></div> <div><div><div> <span> ${cardName} not available for the seller you chose </span> </div></div></div> <div><div><div> <span> ${cardName} unavailable for quantities greater than ${maxQuantity}. </span> </div></div></div> </div></div></div></div></form></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></body></html>\n",
|
589 |
+
"\n",
|
590 |
+
"schema: {'properties': {'productTitle': {'description': 'The full title of the product', 'maxLength': 200, 'minLength': 1, 'title': 'Product Title', 'type': 'string'}, 'price': {'anyOf': [{'exclusiveMinimum': 0.0, 'type': 'number'}, {'type': 'string'}], 'description': 'Unit price (must be > 0, two decimal places).', 'title': 'Product Price'}, 'manufacturer': {'description': 'Name of the product manufacturer.', 'maxLength': 1000, 'minLength': 1, 'title': 'Manufacturer', 'type': 'string'}}, 'required': ['productTitle', 'price', 'manufacturer'], 'title': 'ProductModel', 'type': 'object'}\n",
|
591 |
+
"Please provide the extracted data in JSON format.\n",
|
592 |
+
"WITH ONLY THE FIELDS THAT ARE IN THE SCHEMA.\n",
|
593 |
+
"\n",
|
594 |
"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n",
|
595 |
+
"Final output: {'productTitle': 'Instant Pot RIO, 7-in-1 Electric Multi-Cooker, PressureCooker, SlowCooker, RiceCooker, Steamer, Sauté, Yogurt Maker, & Warmer, Includes App With Over 800 Recipes, 6 Quart', 'price': 94.95, 'manufacturer': 'Instant Pot'}\n",
|
596 |
"++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"
|
597 |
]
|
598 |
},
|
|
|
600 |
"data": {
|
601 |
"text/plain": [
|
602 |
"{'productTitle': 'Instant Pot RIO, 7-in-1 Electric Multi-Cooker, PressureCooker, SlowCooker, RiceCooker, Steamer, Sauté, Yogurt Maker, & Warmer, Includes App With Over 800 Recipes, 6 Quart',\n",
|
603 |
+
" 'price': 94.95,\n",
|
604 |
+
" 'manufacturer': 'Instant Pot'}"
|
605 |
]
|
606 |
},
|
607 |
+
"execution_count": 120,
|
608 |
"metadata": {},
|
609 |
"output_type": "execute_result"
|
610 |
}
|
611 |
],
|
612 |
"source": [
|
613 |
+
"pipe.run(content=url,is_url=True, schema=schema, hf=True)"
|
614 |
]
|
615 |
},
|
616 |
{
|
617 |
"cell_type": "code",
|
618 |
"execution_count": null,
|
619 |
+
"id": "6c4109c9",
|
620 |
+
"metadata": {},
|
621 |
+
"outputs": [],
|
622 |
+
"source": []
|
623 |
+
},
|
624 |
+
{
|
625 |
+
"cell_type": "code",
|
626 |
+
"execution_count": null,
|
627 |
+
"id": "ef3a300c",
|
628 |
"metadata": {},
|
629 |
"outputs": [],
|
630 |
"source": []
|
|
|
646 |
"name": "python",
|
647 |
"nbconvert_exporter": "python",
|
648 |
"pygments_lexer": "ipython3",
|
649 |
+
"version": "3.11.8"
|
650 |
}
|
651 |
},
|
652 |
"nbformat": 4,
|
web2json/__pycache__/ai_extractor.cpython-311.pyc
CHANGED
Binary files a/web2json/__pycache__/ai_extractor.cpython-311.pyc and b/web2json/__pycache__/ai_extractor.cpython-311.pyc differ
|
|
web2json/__pycache__/pipeline.cpython-311.pyc
CHANGED
Binary files a/web2json/__pycache__/pipeline.cpython-311.pyc and b/web2json/__pycache__/pipeline.cpython-311.pyc differ
|
|
web2json/__pycache__/postprocessor.cpython-311.pyc
CHANGED
Binary files a/web2json/__pycache__/postprocessor.cpython-311.pyc and b/web2json/__pycache__/postprocessor.cpython-311.pyc differ
|
|
web2json/__pycache__/preprocessor.cpython-311.pyc
CHANGED
Binary files a/web2json/__pycache__/preprocessor.cpython-311.pyc and b/web2json/__pycache__/preprocessor.cpython-311.pyc differ
|
|
web2json/ai_extractor.py
CHANGED
@@ -232,6 +232,7 @@ class NvidiaLLMClient(LLMClient):
|
|
232 |
Returns:
|
233 |
str: The generated text from the NVIDIA API.
|
234 |
"""
|
|
|
235 |
response = self.client.chat.completions.create(
|
236 |
model=self.model_name,
|
237 |
messages=[{"role": "user", "content": prompt}],
|
@@ -286,50 +287,38 @@ class NvidiaRerankerClient(RerankerClient):
|
|
286 |
self.model_name = model_name
|
287 |
|
288 |
@retry_on_ratelimit(max_retries=6, base_delay=0.5, max_delay=5.0)
|
289 |
-
def rerank(self, query: str, passages: List[str], top_k: int = 3
|
290 |
# 1. Prepare and send documents for scoring
|
291 |
docs = [Document(page_content=p) for p in passages]
|
292 |
-
# print("Bonjour")
|
293 |
-
# print(type(docs),docs)
|
294 |
-
# print(type(query),query)
|
295 |
scored_docs = self.client.compress_documents(
|
296 |
query=str(query),
|
297 |
documents=docs
|
298 |
)
|
299 |
-
|
300 |
-
# 2. Extract raw scores
|
301 |
raw_scores = np.array([doc.metadata['relevance_score'] for doc in scored_docs], dtype=float)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
302 |
|
303 |
-
# 3. Softmax normalization
|
304 |
-
exp_scores = np.exp(raw_scores - np.max(raw_scores))
|
305 |
-
softmax_scores = exp_scores / exp_scores.sum()
|
306 |
|
307 |
-
# 4. (Optional) Min–Max rescale of the softmax outputs
|
308 |
-
min_val, max_val = raw_scores.min(), raw_scores.max()
|
309 |
-
if max_val > min_val:
|
310 |
-
minmax_scores = (raw_scores - min_val) / (max_val - min_val)
|
311 |
-
else:
|
312 |
-
# all scores equal → set them all to 1
|
313 |
-
minmax_scores = np.ones_like(raw_scores)
|
314 |
-
|
315 |
-
# 5. Attach new scores back to metadata
|
316 |
-
for doc, s, mm in zip(scored_docs, softmax_scores, minmax_scores):
|
317 |
-
doc.metadata['softmax_score'] = float(s)
|
318 |
-
doc.metadata['minmax_score'] = float(mm)
|
319 |
-
|
320 |
-
# 6. Sort and return top_k by softmax_score
|
321 |
-
# Sort by softmax_score descending
|
322 |
-
sorted_docs = sorted(
|
323 |
-
scored_docs,
|
324 |
-
key=lambda d: d.metadata['softmax_score'],
|
325 |
-
reverse=True
|
326 |
-
)
|
327 |
-
# print("Ayeeeee")
|
328 |
-
# print("Docs Value:",sorted_docs)
|
329 |
-
# Filter by threshold
|
330 |
-
filtered_docs = [doc for doc in sorted_docs if doc.metadata['minmax_score'] >= threshold]
|
331 |
-
# print("Final", filtered_docs)
|
332 |
-
return filtered_docs
|
333 |
|
334 |
|
335 |
# TODO: will I need it ?
|
@@ -353,32 +342,56 @@ class HFRerankerClient(LLMClient):
|
|
353 |
self.token_true_id = self.tokenizer.convert_tokens_to_ids("yes")
|
354 |
self.token_false_id = self.tokenizer.convert_tokens_to_ids("no")
|
355 |
|
356 |
-
def rerank(self, query: str, passages: List[str], top_k: int = 3) -> List[str]:
|
357 |
"""
|
358 |
-
Rerank passages based on relevance to query.
|
359 |
|
360 |
Args:
|
361 |
query (str): Query string.
|
362 |
passages (List[str]): List of passages.
|
363 |
top_k (int): Number of top passages to return.
|
|
|
364 |
|
365 |
Returns:
|
366 |
-
List[str]: Top-k most relevant passages.
|
367 |
"""
|
368 |
-
inputs = [
|
|
|
|
|
|
|
369 |
scores = []
|
370 |
|
371 |
with torch.no_grad():
|
372 |
for inp in inputs:
|
373 |
logits = self.model(**inp).logits
|
|
|
374 |
score = torch.softmax(logits, dim=1)[0, 1].item() # probability of relevance
|
375 |
-
scores.append(score)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
|
377 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
|
379 |
-
top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
|
380 |
-
print(f"top indices: {top_indices}")
|
381 |
-
return [passages[i] for i in top_indices]
|
382 |
|
383 |
@retry_on_ratelimit(max_retries=6, base_delay=0.5, max_delay=5.0)
|
384 |
def call_api(self, prompt: str) -> str:
|
@@ -457,6 +470,7 @@ class LLMClassifierExtractor(AIExtractor):
|
|
457 |
# print("Using Hugging Face reranker for classification.")
|
458 |
return self.reranker.rerank(query, passages, top_k=top_k)
|
459 |
response = self.reranker.rerank(query,passages)
|
|
|
460 |
# print("DONNNNE")
|
461 |
# NVIDIA reranker path
|
462 |
return response
|
@@ -476,7 +490,8 @@ class LLMClassifierExtractor(AIExtractor):
|
|
476 |
# print(f"Content successfully chunked: {chunks}")
|
477 |
classified_chunks = self.classify_chunks(chunks, hf=hf) # conditional reranker
|
478 |
# extracting the content
|
479 |
-
|
|
|
480 |
# print(f"Classified Chunks {len(classified_chunks)}")
|
481 |
# print(classified_chunks)
|
482 |
# print('='*80)
|
|
|
232 |
Returns:
|
233 |
str: The generated text from the NVIDIA API.
|
234 |
"""
|
235 |
+
print("prompt: ", prompt)
|
236 |
response = self.client.chat.completions.create(
|
237 |
model=self.model_name,
|
238 |
messages=[{"role": "user", "content": prompt}],
|
|
|
287 |
self.model_name = model_name
|
288 |
|
289 |
@retry_on_ratelimit(max_retries=6, base_delay=0.5, max_delay=5.0)
|
290 |
+
def rerank(self, query: str, passages: List[str], top_k: int = 3, threshold: float = 0.5) -> List[Document]:
|
291 |
# 1. Prepare and send documents for scoring
|
292 |
docs = [Document(page_content=p) for p in passages]
|
|
|
|
|
|
|
293 |
scored_docs = self.client.compress_documents(
|
294 |
query=str(query),
|
295 |
documents=docs
|
296 |
)
|
297 |
+
|
298 |
+
# 2. Extract raw scores and compute sigmoid probabilities
|
299 |
raw_scores = np.array([doc.metadata['relevance_score'] for doc in scored_docs], dtype=float)
|
300 |
+
print(f"raw scores {raw_scores}")
|
301 |
+
p_scores = 1 / (1 + np.exp(-raw_scores))
|
302 |
+
print(f"Sigmoid scores: {p_scores}")
|
303 |
+
|
304 |
+
# 3. Min-max normalization
|
305 |
+
min_score = np.min(p_scores)
|
306 |
+
max_score = np.max(p_scores)
|
307 |
+
if max_score == min_score:
|
308 |
+
norm_scores = np.ones_like(p_scores) # All values same — normalize to 1
|
309 |
+
else:
|
310 |
+
norm_scores = (p_scores - min_score) / (max_score - min_score)
|
311 |
+
print(f"Normalized scores: {norm_scores}")
|
312 |
+
|
313 |
+
# 4. Filter by threshold using normalized scores
|
314 |
+
scored_pairs = [(doc, norm) for doc, norm in zip(scored_docs, norm_scores) if norm > threshold]
|
315 |
+
print(f"Filtered pairs:\n{scored_pairs}")
|
316 |
+
|
317 |
+
# 5. Return top_k documents (already sorted by model, no need to re-sort)
|
318 |
+
top_docs = [doc.page_content for doc, _ in scored_pairs]
|
319 |
+
return top_docs
|
320 |
|
|
|
|
|
|
|
321 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
322 |
|
323 |
|
324 |
# TODO: will I need it ?
|
|
|
342 |
self.token_true_id = self.tokenizer.convert_tokens_to_ids("yes")
|
343 |
self.token_false_id = self.tokenizer.convert_tokens_to_ids("no")
|
344 |
|
345 |
+
def rerank(self, query: str, passages: List[str], top_k: int = 3, threshold: float = 0.5) -> List[str]:
|
346 |
"""
|
347 |
+
Rerank passages based on relevance to query using min-max normalized scores.
|
348 |
|
349 |
Args:
|
350 |
query (str): Query string.
|
351 |
passages (List[str]): List of passages.
|
352 |
top_k (int): Number of top passages to return.
|
353 |
+
threshold (float): Minimum normalized score to include passage.
|
354 |
|
355 |
Returns:
|
356 |
+
List[str]: Top-k most relevant passages above threshold.
|
357 |
"""
|
358 |
+
inputs = [
|
359 |
+
self.tokenizer(f"{query} [SEP] {p}", return_tensors="pt", truncation=True, padding=True).to(self.device)
|
360 |
+
for p in passages
|
361 |
+
]
|
362 |
scores = []
|
363 |
|
364 |
with torch.no_grad():
|
365 |
for inp in inputs:
|
366 |
logits = self.model(**inp).logits
|
367 |
+
# print("logits:", logits)
|
368 |
score = torch.softmax(logits, dim=1)[0, 1].item() # probability of relevance
|
369 |
+
scores.append(score)
|
370 |
+
|
371 |
+
print(f"Softmax Scores: {scores}")
|
372 |
+
|
373 |
+
# Min-max normalize the scores
|
374 |
+
scores_np = np.array(scores)
|
375 |
+
min_score = scores_np.min()
|
376 |
+
max_score = scores_np.max()
|
377 |
+
if max_score == min_score:
|
378 |
+
norm_scores = np.ones_like(scores_np)
|
379 |
+
else:
|
380 |
+
norm_scores = (scores_np - min_score) / (max_score - min_score)
|
381 |
|
382 |
+
print(f"Normalized Scores: {norm_scores}")
|
383 |
+
# Filter based on normalized threshold
|
384 |
+
filtered = [(i, s) for i, s in enumerate(norm_scores) if s > threshold]
|
385 |
+
print(f"Filtered: {filtered}")
|
386 |
+
|
387 |
+
# Sort by normalized score descending
|
388 |
+
filtered.sort(key=lambda x: x[1], reverse=True)
|
389 |
+
|
390 |
+
# Select top_k passages
|
391 |
+
top_passages = [passages[i] for i, _ in filtered]
|
392 |
+
|
393 |
+
return top_passages
|
394 |
|
|
|
|
|
|
|
395 |
|
396 |
@retry_on_ratelimit(max_retries=6, base_delay=0.5, max_delay=5.0)
|
397 |
def call_api(self, prompt: str) -> str:
|
|
|
470 |
# print("Using Hugging Face reranker for classification.")
|
471 |
return self.reranker.rerank(query, passages, top_k=top_k)
|
472 |
response = self.reranker.rerank(query,passages)
|
473 |
+
print(f"response: {response}")
|
474 |
# print("DONNNNE")
|
475 |
# NVIDIA reranker path
|
476 |
return response
|
|
|
490 |
# print(f"Content successfully chunked: {chunks}")
|
491 |
classified_chunks = self.classify_chunks(chunks, hf=hf) # conditional reranker
|
492 |
# extracting the content
|
493 |
+
|
494 |
+
# classified_chunks = [chunk.page_content for chunk in classified_chunks]
|
495 |
# print(f"Classified Chunks {len(classified_chunks)}")
|
496 |
# print(classified_chunks)
|
497 |
# print('='*80)
|