abdo-Mansour commited on
Commit
d053923
·
1 Parent(s): e202963

did some refactoring

Browse files
test.ipynb CHANGED
@@ -5,16 +5,7 @@
5
  "execution_count": 1,
6
  "id": "5223b1b7",
7
  "metadata": {},
8
- "outputs": [
9
- {
10
- "name": "stdout",
11
- "output_type": "stream",
12
- "text": [
13
- "WARNING:tensorflow:From c:\\Users\\Omar\\AppData\\Local\\Programs\\Python\\Python311\\Lib\\site-packages\\tf_keras\\src\\losses.py:2976: The name tf.losses.sparse_softmax_cross_entropy is deprecated. Please use tf.compat.v1.losses.sparse_softmax_cross_entropy instead.\n",
14
- "\n"
15
- ]
16
- }
17
- ],
18
  "source": [
19
  "from web2json.preprocessor import *\n",
20
  "from web2json.ai_extractor import *\n",
@@ -49,20 +40,11 @@
49
  "execution_count": 3,
50
  "id": "9e6b0eb9",
51
  "metadata": {},
52
- "outputs": [
53
- {
54
- "name": "stderr",
55
- "output_type": "stream",
56
- "text": [
57
- "Some weights of Qwen3ForSequenceClassification were not initialized from the model checkpoint at Qwen/Qwen3-Reranker-0.6B and are newly initialized: ['score.weight']\n",
58
- "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n"
59
- ]
60
- }
61
- ],
62
  "source": [
63
  "llm = NvidiaLLMClient(config={'api_key': os.getenv('NVIDIA_API_KEY'),'model_name': 'qwen/qwen2.5-7b-instruct'})\n",
64
- "# reranker = NvidiaRerankerClient(config={'api_key': os.getenv('NVIDIA_API_KEY'),'model_name': 'nv-rerank-qa-mistral-4b:1'})\n",
65
- "reranker = HFRerankerClient()"
66
  ]
67
  },
68
  {
@@ -171,6 +153,183 @@
171
  "post = PostProcessor()"
172
  ]
173
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
174
  {
175
  "cell_type": "code",
176
  "execution_count": 8,
@@ -252,7 +411,7 @@
252
  },
253
  {
254
  "cell_type": "code",
255
- "execution_count": 13,
256
  "id": "f07e1aca",
257
  "metadata": {},
258
  "outputs": [],
@@ -280,7 +439,7 @@
280
  },
281
  {
282
  "cell_type": "code",
283
- "execution_count": 15,
284
  "id": "79cf2321",
285
  "metadata": {},
286
  "outputs": [
@@ -289,31 +448,29 @@
289
  "output_type": "stream",
290
  "text": [
291
  "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n",
292
- "Content successfully chunked into 11.\n",
293
- "Content successfully chunked: [\"<html><head>\\n<link/>\\n<link/>\\n<link/>\\n<meta/><title>Amazon.com: Instant Pot Duo 7-in-1 Electric Pressure Cooker, Slow Cooker, Rice Cooker, Steamer, Sauté, Yogurt Maker, Warmer &amp; Sterilizer, Includes App With Over 800 Recipes, Stainless Steel, 6 Quart</title>\\n</head><body><div><button>\\n Shortcuts menu\\n</button></div></body><body><div><nav>\\n<h2>Skip to</h2>\\n<ul>\\n<li>\\n Main content\\n</li>\\n<li>\\n About this item\\n</li>\\n<li>\\n About this item\\n</li>\\n<li>\\n About this item\\n</li>\\n<li>\\n Buying options\\n</li>\\n<li>\\n Compare with similar items\\n</li>\\n<li>\\n Videos\\n</li>\\n<li>\\n Reviews\\n</li>\\n</ul>\\n<h2>\\n Keyboard shortcuts\\n </h2>\\n<ul>\\n<li>\\nSearch\\nalt\\n+\\n/\\n</li>\\n<li>\\nCart\\nshift\\n+\\nalt\\n+\\nC\\n</li>\\n<li>\\nHome\\nshift\\n+\\nalt\\n+\\nH\\n</li>\\n<li>\\nOrders\\nshift\\n+\\nalt\\n+\\nO\\n</li>\\n<li>\\n<button>\\n<div>\\n<span>Add to cart</span>\\n<div>\\n<span>shift</span>\\n<span>+</span>\\n<span>alt</span>\\n<span>+</span>\\n<span>K</span>\\n</div>\\n</div>\\n</button>\\n</li>\\n<li>\\n<button>\\n<div>\\n<span>Open/close shortcuts menu</span>\\n<div>\\n<span>shift</span>\\n<span>+</span>\\n<span>alt</span>\\n<span>+</span>\\n<span>Z</span>\\n</div>\\n</div>\\n</button>\\n</li>\\n</ul>\\n<div>\\n<div>\\n<div>\\n<div>\\n<span>To move between items, use your keyboard's up or down arrows.</span>\\n</div>\\n</div>\\n</div>\\n</div>\\n</nav></div></body><body><div><div><div><div>\\n<div>\\n<div><div><div><ul><li><span>Home &amp; Kitchen</span></li><li><span>›</span></li><li><span>Kitchen &amp; Dining</span></li><li><span>›</span></li><li><span>Small Appliances</span></li><li><span>›</span></li><li><span>Rice Cookers</span></li></ul></div></div></div> </div>\\n</div></div></div></div></body></html>\", '<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div> <div> <h5> <div>\\n<div> <span> Deal Price Regular Price </span> </div> </div>\\n</h5> <div>\\n<div>\\n<div>\\n<div>\\n<div>\\n<div> <div>\\n<div>\\n<span><span>$79.99</span><span><span>$</span><span>79<span>.</span></span><span>99</span></span></span> </div>\\n</div> </div> </div>\\n</div>\\n</div>\\n</div>\\n<div>\\n<div> <div> <div> <span> Ships from: </span> <span> Amazon.com </span> </div> </div> <div> <div> <span> Sold by: </span> <span> Amazon.com </span> </div> </div> </div> </div>\\n</div>\\n<div><form><input/></form></div><div><form><div><div><div>\\n<div>\\n<span> $235.34 Shipping &amp; Import Fees Deposit to Egypt </span> <span> Details </span> <div> <h3>Shipping &amp; Fee Details</h3>\\n<table> <tr> <td> <span> Price </span> </td> <td> <span> $99.95 </span> </td> </tr> <tr> <td> <span> AmazonGlobal Shipping </span> </td> <td> <span> $81.05 </span> </td> </tr> <tr> <td> <span> \\n Estimated Import Fees Deposit\\n</span> </td> <td> <span> $154.29 </span> </td> </tr> <tr> <td> <span>Total</span> </td> <td> <span> $335.29 </span> </td> </tr> </table> </div>\\n</div>\\n<div>\\n<div>\\n<div><div><div><span> Delivery <span>Sunday, July 13</span>. Order within <span>23 hrs 59 mins</span> </span></div></div></div> </div>\\n<div>\\n<span> \\nDeliver to\\xa0Egypt\\n </span> </div>\\n</div>\\n</div></div></div></form></div><div><form><div><div><div>\\n<div> <div> <span> In Stock </span> </div> </div> </div></div></div></form></div></div> <div>\\n<div>\\n<div> <div> <span> This deal is exclusively for Amazon Prime members. </span> </div>\\n<div> <div>\\n<span><span><input/><span> Join Prime </span></span></span> </div>\\n<div> <span>Cancel anytime</span> </div> </div> <div> <span> Already a member? </span> Sign in </div> </div> </div>\\n<div>\\n<div> <div> <div> <div>\\n<div>\\n<div>\\n<div>\\n<span>Ships from</span> </div>\\n</div>\\n<div>\\n<div>\\n<span>Amazon.com</span> </div>\\n<span> Amazon.com </span> <div> <div> <div> <span>Ships from</span> </div> <div> Amazon.com </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Sold by</span> </div>\\n</div>\\n<div>\\n<div>\\n<span>Amazon.com</span> </div>\\n<span> Amazon.com </span> <div> <div> <div> <span>Sold by</span> </div> <div> Amazon.com </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Returns</span> </div>\\n</div>\\n<div>\\n<span> 30-day refund/replacement </span> <div> <div> <div> <span>30-day refund/replacement</span> </div> <div> This item can be returned in its original condition for a full refund or replacement within 30 days of receipt. </div> <div> Read full return policy </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Payment</span> </div>\\n</div>\\n<div>\\n<span> Secure transaction </span> <div> <div> <div> <span>Your transaction is secure</span> </div> <div> We work hard to protect your security and privacy. Our payment security system encrypts your information during transmission. We don’t share your credit card details with third-party sellers, and we don’t sell your information to others. Learn more </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Support</span> </div>\\n</div>\\n<div>\\n<span> Product support included </span> <div> <div> <div> <span>What\\'s Product Support?</span> </div> <div> In the event your product doesn\\'t work as expected or you need help using it, Amazon offers free product support options such as live phone/chat with an Amazon associate, manufacturer contact information, step-by-step troubleshooting guides, and help videos. \\nBy solving product issues, we help the planet by extending the life of products. Availability of support options differ by product and country. Learn more </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Packaging</span> </div>\\n</div>\\n<div>\\n<span> Ships in product packaging </span> <div> <div> <div> <span>Ships in product packaging</span> </div> <div> <p>This item has been tested to certify it can ship safely in its original box or bag to avoid unnecessary packaging. Since 2015, we have reduced the weight of outbound packaging per shipment by 41% on average, that’s over 2 million tons of packaging material.</p><i>If you still require Amazon packaging for this item, choose \"Ship in Amazon packaging\" at checkout. </i> Learn more </div> </div> </div> </div> </div>\\n</div>\\n</div> <div> <div>See more</div> </div> </div> </div> </div>\\n</div> </div></div></div></div></div></div></div></div></div></div></div></div></div></div></body></html>', \"<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><form><div><div><div>\\n<div>\\n<div> <span> <div> <div>\\n<span><label>Quantity:</label><select> <option>1 </option> <option>2 </option> <option>3 </option> <option>4 </option> <option>5 </option> <option>6 </option> <option>7 </option> <option>8 </option> <option>9 </option> <option>10 </option> <option>11 </option> <option>12 </option> <option>13 </option> <option>14 </option> <option>15 </option> <option>16 </option> <option>17 </option> <option>18 </option> <option>19 </option> <option>20 </option> <option>21 </option> <option>22 </option> <option>23 </option> <option>24 </option> <option>25 </option> <option>26 </option> <option>27 </option> <option>28 </option> <option>29 </option> <option>30 </option> </select><span><span><span><span>Quantity:</span><span>1</span></span></span></span></span> </div> </div> <span><input/><span> Buy Now </span></span></span> <div><div> <span> Enhancements you chose aren't available for this seller. </span> <span> Details </span> <div> <div> <div> <div> <span> To add the following enhancements to your purchase, choose a different seller. </span> </div> <div> <span>%cardName%</span> </div> </div> </div> </div> </div></div></div> <span> <span><span><input/><span>Add to Cart</span></span></span> </span></div> <input/><div> <div> <span>$</span><span><span><span>$79.99</span><span><span>79<span>.</span></span><span>99</span></span></span></span> <span> \\n ()\\n </span> <span> Includes selected options. </span> <span> Includes initial monthly payment and selected options. </span> <span> <span> <span>\\n Details </span>\\n</span> <div> <div> <div> <div> <div><div> <div> <span>Price</span> <span> <span> <span> (</span><span>$</span><span>79<span>.</span></span><span>99</span><span>x)</span> </span> </span> </div> <div> <span> <span> <span>$</span><span>79<span>.</span></span><span>99</span> </span> </span> </div> </div></div> </div> <div> <div><div> <div> <span>Subtotal</span> </div> <div> <span> <span>$</span><span><span><span>$79.99</span><span><span>79<span>.</span></span><span>99</span></span></span></span> </span> </div> </div></div> <div><div> <div> <span>Subtotal</span> </div> </div></div> <div> <div> <span>Initial payment breakdown</span> </div> </div> <div> <span>Shipping cost, delivery date, and order total (including tax) shown at checkout.</span>\\n</div> </div> </div> </div> </div> </span> </div> </div></div></div></div></form></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></body></html>\", '<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><form><div><div><div>\\n<div> <div> <div> <div>\\n<div>\\n<div>\\n<div>\\n<span>Ships from</span> </div>\\n</div>\\n<div>\\n<div>\\n<span>Amazon.com</span> </div>\\n<span> Amazon.com </span> <div> <div> <div> <span>Ships from</span> </div> <div> Amazon.com </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Sold by</span> </div>\\n</div>\\n<div>\\n<div>\\n<span>Amazon.com</span> </div>\\n<span> Amazon.com </span> <div> <div> <div> <span>Sold by</span> </div> <div> Amazon.com </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Returns</span> </div>\\n</div>\\n<div>\\n<span> 30-day refund/replacement </span> <div> <div> <div> <span>30-day refund/replacement</span> </div> <div> This item can be returned in its original condition for a full refund or replacement within 30 days of receipt. </div> <div> Read full return policy </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<div>\\n<div>\\n<span>Packaging</span> </div>\\n</div>\\n<div>\\n<span> Ships in product packaging </span> <div> <div> <div> <span>Ships in product packaging</span> </div> <div> <p>This item has been tested to certify it can ship safely in its original box or bag to avoid unnecessary packaging. Since 2015, we have reduced the weight of outbound packaging per shipment by 41% on average, that’s over 2 million tons of packaging material.</p><i>If you still require Amazon packaging for this item, choose \"Ship in Amazon packaging\" at checkout. </i> Learn more </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Payment</span> </div>\\n</div>\\n<div>\\n<span> Secure transaction </span> <div> <div> <div> <span>Your transaction is secure</span> </div> <div> We work hard to protect your security and privacy. Our payment security system encrypts your information during transmission. We don’t share your credit card details with third-party sellers, and we don’t sell your information to others. Learn more </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Support</span> </div>\\n</div>\\n<div>\\n<span> Product support included </span> <div> <div> <div> <span>What\\'s Product Support?</span> </div> <div> In the event your product doesn\\'t work as expected or you need help using it, Amazon offers free product support options such as live phone/chat with an Amazon associate, manufacturer contact information, step-by-step troubleshooting guides, and help videos. \\nBy solving product issues, we help the planet by extending the life of products. Availability of support options differ by product and country. Learn more </div> </div> </div> </div> </div>\\n</div>\\n</div>\\n</div>\\n</div> <div> <div>See more</div> </div> </div> <label><input/><span>Add a gift receipt for easy returns</span></label></div> </div></div></div></form> \\n</div><span>Instant Pot Duo</span></div></div></div>\\n</div>\\n</div>\\n</div> \\xa0 Report an issue with this product or seller<h4>Product voltage: 120</h4></div> <span> <span>8K+ bought</span><span> in past month</span> </span>Brief content visible, double tap to read full content.</div> Visit the Instant Pot Store \\n <ul> <li><span> 7-IN-1 FUNCTIONALITY: Pressure cook, slow cook, rice cooker, yogurt maker, steamer, sauté pan and food warmer. </span></li> <li><span> QUICK ONE-TOUCH COOKING: 13 customizable Smart Programs for pressure cooking ribs, soups, beans, rice, poultry, yogurt, desserts and more. </span></li> <li><span> COOK FAST OR SLOW: Pressure cook delicious one-pot meals up to 70% faster than traditional cooking methods or slow cook your favorite traditional recipes – just like grandma used to make. </span></li> <li><span> QUICK AND EASY CLEAN UP: Finger-print resistant, stainless-steel sides and dishwasher-safe lid, inner pot, and accessories. </span></li> <li><span> SAFETY FEATURES: Includes over 10 safety features, plus overheat protection and safe-locking lid </span></li> <li><span> GREAT FOR GROWING FAMILIES: Cook for up to 6 people – perfect for growing families, or meal prepping and batch cooking for singles. </span></li> <li><span> VERSATILE INNER COOKING POT: We use food-grade stainless-steel, a tri-ply bottom for more even cooking and perfect for sautéing </span></li> <li><span> DISCOVER AMAZING RECIPES: Includes the free Instant Brands Connect App, where you can find new recipes to create quick favorites and prepare delicious meals, available for iOS and Android. </span></li> </ul><div> <span>›</span> See more product details </div> <div><div> <div> Instant Pot RIO, 7-in-1 Electric Multi-Cooker, PressureCooker, SlowCooker, RiceCooker, Steamer, Sauté, Yogurt Maker, &amp; Warmer, Includes App With Over 800 Recipes, 6 Quart <span>$94.95</span> (2,374) <span>In Stock</span> </div> </div></div></div></div><h2>\\n Discover similar items</h2></div></div></div></div></div></body></html>', '<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div> <h4>\\n Color \\n \\n Capacity \\n \\n Grade \\n \\n Power \\n \\n Heat Source \\n \\n Material \\n \\n Slow Cooker Type \\n \\n Brand \\n \\n Closure \\n \\n Width \\n \\n Finish \\n \\n Control Type \\n \\n Free From \\n \\n Heating Elements \\n \\n Heating Type \\n \\n Depth \\n \\n Premium Brands \\n \\n Output Wattage \\n \\n Features \\n \\n Uses \\n \\n Inclusions \\n \\n Height \\n \\n Length \\n \\n Lid Material \\n \\n Shape \\n \\n Top Brands in Home &amp; Kitchen \\n \\n Style \\n </h4>\\n<div> <div> <span>\\n<span> <span><span><input/><span>\\n<span>Black</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Grey</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>White</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Brown</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Beige</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Red</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Pink</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Orange</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Yellow</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Ivory</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Green</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Blue</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Purple</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Gold</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Silver</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Multi</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><input/><span>\\n<span>Clear</span> </span></span></span> </span> </span>\\n<span>\\n<span> <span><span><span>\\n<span>Stainless Steel</span> </span></span></span> </span> </span>\\n</div> </div> <span> <button>\\n<span>Clear Filters</span> </button>\\n</span></div></div></div></div></div></div>\\n</div>\\n<span> <span><span><span> <span>\\n<span>Black</span>\\n</span>\\n</span></span></span> </span><span> <span><span><span> <span>\\n<span>Grey</span>\\n</span>\\n</span></span></span> </span><span> <span><span><span> <span> <span>Capacity: Up to 2.99 L</span>\\n</span>\\n</span></span></span> </span><span> <span><span><span> <span> <span>Capacity: 3 to 4.99 L</span>\\n</span>\\n</span></span></span> </span><span> <span><span><span> <span> <span>Grade: Commercial Grade</span>\\n</span>\\n</span></span></span> </span><span> <span><span><span> <span> <span>Power: Up to 1499 W</span>\\n</span>\\n</span></span></span> </span><span> <span><span><span> <span> <span>Power: 1500 to 1599 W</span>\\n</span>\\n</span></span></span> </span><span> <span><span><span> <span> <span>Heat Source: Gas</span>\\n</span>\\n</span></span></span> </span><span> <span><span><span> <span> <span>Heat Source: Electric</span>\\n</span>\\n</span></span></span> </span><span> <span><span><span> <span> <span>Material: Aluminum</span>\\n</span>\\n</span></span></span> </span><span> <span><span><span> <span> <span>Material: Stainless Steel</span>\\n</span>\\n</span></span></span> </span><span> <span><span><span> <span> <span>Slow Cooker Type: Manual</span>\\n</span>\\n</span></span></span> </span><span> <span><span><span> <span> <span>Slow Cooker Type: Programmable</span>\\n</span>\\n</span></span></span> </span></div></div>\\n</div></div></div></div></div></div></div></body></html>', \"<html><body><div><div><div><div>\\n<div><div> <hr/> <div> <div><div><h2>Deals on related products</h2> <div> <div> <span> Sponsored </span> </div> </div> </div><div><span><span>Page <span>1</span> of <span>1</span></span><span>Start over</span></span></div></div> <div> <div><div><div>Previous page of related Sponsored Products</div><div><div><ol> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> CHEF iQ Smart Pressure Cooker with WiFi and Built-in Scale - Easy-to-Use 10-in-1 Mu... </div> <div> 2,645 </div> With Prime <div> -30%$139.98$139.98List Price:$199.99$199.99 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> Hamilton Beach 3-in-1 Electric Egg Cooker for Hard Boiled Eggs, Poacher Eggs, Omele... </div> <div> 5,209 </div> <div> Amazon's\\xa0Choice </div> Limited time deal <div> -19%$16.98$16.98List:$20.95$20.95 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> CUCKOO CRP-ST1009FW 10-Cup (Uncooked) / 20-Cup (Cooked) Twin Pressure Rice Cooker &amp;... </div> <div> 366 </div> With Prime <div> -31%$239.99$239.99List Price:$349.99$349.99 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> Pizza Oven Indoor, Countertop Electric Pizza Maker 12-inch, 2-minute Pizza, 6 Prese... </div> <div> 12 </div> Limited time deal <div> -15%$169.99$169.99List:$199.99$199.99 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> WantJoin Pressure Cooker, 8 Quart Stainless Steel Pressure Canner, Induction Compat... </div> <div> 947 </div> <div> Amazon's\\xa0Choice </div> Limited time deal <div> -10%$80.89$80.89List:$89.99$89.99 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> CUCKOO CR-0675FW 6-Cup (Uncooked) / 12-Cup (Cooked) Micom Rice Cooker with Nonstick... </div> <div> 3,644 </div> <div> Amazon's\\xa0Choice </div> With Prime <div> -27%$79.99$79.99List Price:$109.99$109.99 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> CUCKOO CR-0633F | 6-Cup (Uncooked) Micom Rice Cooker | 11 Menu Options: White Rice,... </div> <div> 2,689 </div> With Prime <div> -31%$89.99$89.99List Price:$129.99$129.99 </div> </div> </li> </ol></div></div><div>Next page of related Sponsored Products</div></div></div> </div> </div>\\n<div><h2>\\n Product information </h2></div></div></div> </div></div></div></div></body></html>\", '<html><body><div><div><div><div><div><div><div><div><div><div><div> <div> <div> <table> <tr> <th> Brand </th> <td> Instant Pot </td> </tr> <tr> <th> Capacity </th> <td> 5.68 Liters </td> </tr> <tr> <th> Material </th> <td> Stainless steel </td> </tr> <tr> <th> Finish Type </th> <td> Stainless Steel </td> </tr> <tr> <th> Product Dimensions </th> <td> 12.2\"D x 13.38\"W x 12.48\"H </td> </tr> <tr> <th> Special Feature </th> <td> Programmable </td> </tr> <tr> <th> Wattage </th> <td> 1000 watts </td> </tr> <tr> <th> Item Weight </th> <td> 11.8 Pounds </td> </tr> <tr> <th> Control Method </th> <td> Touch </td> </tr> <tr> <th> Controller Type </th> <td> Push Button </td> </tr> <tr> <th> Operation Mode </th> <td> Automatic </td> </tr> <tr> <th> Is Dishwasher Safe </th> <td> Yes </td> </tr> <tr> <th> Voltage </th> <td> 120 Volts </td> </tr> <tr> <th> Closure Type </th> <td> Outer Lid, Inner Lid </td> </tr> <tr> <th> UPC </th> <td> 810028585201 </td> </tr> <tr> <th> Item Weight </th> <td> 11.8 pounds </td> </tr> <tr> <th> Manufacturer </th> <td> Instant Pot </td> </tr> <tr> <th> ASIN </th> <td> B00FLYWNYQ </td> </tr> <tr> <th> Country of Origin </th> <td> China </td> </tr> <tr> <th> Item model number </th> <td> 112-0170-01 </td> </tr> <tr> <th>Customer Reviews</th> <td> <div>\\n<span> <span>\\n<span> 4.6 4.6 out of 5 stars </span> </span>\\n</span> <span> 130,203 ratings </span>\\n</div>\\n 4.6 out of 5 stars </td> </tr> <tr> <th> Best Sellers Rank </th> <td> <span> <ul> <li><span><span>#27 in Kitchen &amp; Dining (See Top 100 in Kitchen &amp; Dining)</span></span></li> <li><span><span>#1 in Electric Pressure Cookers</span></span></li> <li><span><span>#2 in Rice Cookers</span></span></li> </ul> </span> </td> </tr> <tr> <th> Is Discontinued By Manufacturer </th> <td> No </td> </tr> <tr> <th> Date First Available </th> <td> December 2, 2013 </td> </tr> </table> </div> <h3> Warranty &amp; Support </h3><div> Other content Manual<span> [PDF ] </span>User Guide Manual<span> [PDF ] </span>Product Warranty: For warranty information about this product, please click here.<span> [PDF ] </span> </div></div> </div></div></div>Brief content visible, double tap to read full content. <div>\\nSafety Information (PDF)\\n</div><div>\\nUser Manual (PDF)\\n</div><div>\\nUser Guide (PDF)\\n</div><h2>Compare with similar items</h2></div> <div>\\n<p> <span>Easy to use, easy to clean, fast, versatile, and convenient, the Instant Pot® Duo™ is the one that started it all. It replaces 7 kitchen appliances: pressure cooker, slow cooker, rice cooker, steamer, sauté pan, yogurt maker &amp; warmer. With 13 built-in smart programs, cook your favorite dishes with the press of a button. The tri-ply, stainless steel inner pot offers quick, even heating performance. Redefine cooking and enjoy quick and easy meals anywhere, any time. The Instant Pot Duo offers the quality, convenience and versatility you’ve come to expect from Instant – discover amazing.</span> </p> </div></div> <div> <div> <div><div> <div> <span>Highly Rated</span> </div> <div> <span>100K+ customers rate items from this brand highly</span> </div> </div></div> </div> <div> <div><div> <div> <span>Trending</span> </div> <div> <span>100K+ orders for this brand in past 3 months</span> </div> </div></div> </div> <div> <div><div> <div> <span>Low Returns</span> </div> <div> <span>Customers usually keep items from this brand</span> </div> </div></div> </div> </div></div></div></div></div></div></div></body></html>', '<html><body><div><div><div><div><div><div><div><div><div><table><tr><td><div><div><span>This Item</span></div><span><span>Buying options</span></span></div><div><div><span>Instant Pot\\xa0</span><span>Duo 7-in-1 Electric Pressure Cooker, Slow Cooker, Rice Cooker, Steamer, Sauté, Yogurt Maker, Warmer &amp; Sterilizer, Includes App With Over 800 Recipes, Stainless Steel, 6 Quart</span></div></div><span>Price</span><span>Delivery</span><span>Customer Ratings</span><span>Sold By</span><span>capacity</span><span>operation mode</span><span>control method</span><span>material</span><span>dishwasher safe</span></td><td><div><div><span>Recommendations</span></div></div><div>Instant Pot\\xa0Duo Crisp 11-in-1 Air Fryer and Electric Pressure Cooker Combo with Multicooker Lids that Air Fries, Steams, Slow Cooks, Sautés, Dehydrates, &amp; More, Free App With Over 800 Recipes, 6 Quart</div></td><td><div>carori\\xa0CARORI 9-in-1 Electric Pressure Cooker 6 Qt, Programmable Multi-Function Cooker with Safer Vent, Olla de Presion, Rice Cooker, Slow Cooker, Steamer, Sauté, Warmer &amp; Sterilizer, 1000W, Stainless Steel</div></td><td><div>Midea\\xa012-in-1 Electric Pressure Cooker, 8 Quarts, 12 Presets, Multi-Functional Programmable Slow Cooker, Rice Cooker, Steamer, Sauté Pan, Yogurt Maker, and More, Stainless Steel</div></td></tr></table><h2>Products related to this item</h2><div> <div> <span> Sponsored </span> </div> </div><div><h2>Similar brands on Amazon</h2></div><span>\\n<div>\\n<div><div><div><div><h2>Customer reviews</h2></div><div><div><div><i><span>4.6 out of 5 stars</span></i></div><div><div><span><span>4.6 out of 5</span></span></div></div></div></div><div><span>130,203 global ratings</span></div><div><div><div><ul><li><span>5 star4 star3 star2 star1 star5 star83%10%3%1%3%83%</span></li><li><span>5 star4 star3 star2 star1 star4 star83%10%3%1%3%10%</span></li><li><span>5 star4 star3 star2 star1 star3 star83%10%3%1%3%3%</span></li><li><span>5 star4 star3 star2 star1 star2 star83%10%3%1%3%1%</span></li><li><span>5 star4 star3 star2 star1 star1 star83%10%3%1%3%3%</span></li></ul></div></div></div><div><div><div><div>How customer reviews and ratings work<div><p>Customer Reviews, including Product Star Ratings help customers to learn more about the product and decide whether it is the right product for them.</p><p>To calculate the overall star rating and percentage breakdown by star, we don’t use a simple average. Instead, our system considers things like how recent a review is and if the reviewer bought the item on Amazon. It also analyzed reviews to verify trustworthiness.</p>Learn more how customers reviews work on Amazon</div></div></div></div></div></div></div></div>\\n</div></span><div><h3>Review this product</h3><div>Share your thoughts with other customers</div><div><span><span>Write a customer review</span></span></div></div><div><div><div><div><div><div><div><div><h3>Customers say</h3></div><p><span>Customers find the pressure cooker works well, particularly praising its sauté feature and accurate cooking times. They appreciate its ease of use, with one customer noting the intuitive controls, and consider it a great kitchen appliance that makes meal prep convenient. The appliance receives positive feedback for its cooking ability, with one customer highlighting its versatility in transforming into a pressure cooker, and customers find it easy to clean with a stainless steel pot that cleans well. Customers enjoy the complex flavors produced, though opinions on build quality are mixed, with some finding it well-made while others describe it as wimpy.</span></p><p><span>AI Generated from the text of customer reviews</span></p></div></div></div></div></div></div></div><div><div><div><div><div><div><div><div><div><div><div><div><h4>Select to learn more</h4></div><div>Works wellEase of useCook timeAppliance qualityCooking abilityEase of cleaningFlavorBuild quality</div></div></div></div></div></div></div></div></div></div></div></div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><span>8,039 customers mention \"Works well\"</span><span>6,939 positive</span><span>1,100 negative</span></div></div></div><div><div><p>Customers find that the pressure cooker works well, with the sauté feature performing particularly effectively.</p></div></div><div><div><p>\"...This <b>works with new potatoes</b>, and regular potatoes! Happy Instant Potting!\" Read more</p></div></div><div><div><p>\"...<b>It was excellent</b>. I did 6 minutes per pound + 2 minutes. I also cook chicken thighs for dinner about once a week, which I had never cooked before....\" Read more</p></div></div><div><div><p>\"...Most <b>programs work just fine on full automatic</b>, but some small exceptions may demand more online flexibility....\" Read more</p></div></div><div><div><p>\"...occasional mishaps, the Instant Pot Duo has consistently <b>delivered incredible results</b>....\" Read more</p></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div><hr/></div></div></div></div></div></div></body></html>', '<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><span>7,651 customers mention \"Ease of use\"6,666 customers mention \"Cook time\"5,399 customers mention \"Appliance quality\"4,396 customers mention \"Cooking ability\"3,065 customers mention \"Ease of cleaning\"2,827 customers mention \"Flavor\"2,273 customers mention \"Build quality\"</span><span>6,651 positive</span><span>1,000 negative</span>Amazon Customer<i><span>5 out of 5 stars</span></i>MoreHide</div><h5>This has changed the way we eat. It\\'s easier to use than I thought it would be.</h5></div></div><div><div><p>Customers find the pressure cooker simple to use, with clear operating instructions in the booklet, making meal preparation a breeze.</p></div></div><div><div><p>\"...make in your Instant Pot that will change your life: <b>incredibly easy perfectly poached eggs</b> in 2-3 minutes, and baked potatoes in 12 minutes....\" Read more</p></div></div><div><div><p>\"...credit as most automatic settings work well, automating it for <b>ease of use</b> and safety. Cooking is part Science, but, I think, more Art than Science....\" Read more</p></div></div><div><div><p>\"...crockpot extensively over the past years and while I appreciate the <b>ease of use</b> and the ability to put a meal on the table soon after I got home in...\" Read more</p></div></div><div><div><p>\"...of pressure cookers anymore, the time , energy bills saved n <b>convenience is worth it</b>!...\" Read more</p></div></div></div>Sorry we couldn\\'t load the review</div><span><div><div><div>Thank you for your feedback</div><button>Close</button></div></div></span><span><div><div><div>Sorry, there was an error</div><button>Close</button></div></div></span></div><h3>Reviews with images</h3><button><span>All photos</span></button></div></div></div>\\n View Image Gallery\\n</div></div></div><span><div><div><div><h3>\\n Top reviews from the United States\\n</h3></div></div></div></span><span><div><div><div><div><div><div><h4>There was a problem filtering reviews. Please reload the page.</h4></div></div></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div>Anne P. Mitchell</div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><h5>5.0 out of 5 stars\\nI LOVE My Instant Pot! But Here\\'s What I Wish I\\'d Known when I First Got It\\n</h5></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><span>Reviewed in the United States on April 16, 2016</span></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><span>Size: 6 Quarts</span>Verified Purchase</div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><span><div><div><span><br/></span></div></div></span></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><span><div><div>Read more</div></div></span></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><span>\\n<div><span>30,251 people found this helpful</span></div>\\n<div>\\n<span><span>\\n Helpful\\n</span></span></div>\\n</span><span>\\n<span><span>Report</span></span></span>\\n</div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div>Aundrea</div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><h5>5.0 out of 5 stars\\nThis has changed the way we eat. It\\'s easier to use than I thought it would be.\\n</h5></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><span>Reviewed in the United States on August 18, 2016</span></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><div><div><div>\\n<div>\\nAundrea\\n</div>\\n<i><span>5.0 out of 5 stars</span></i>\\n<h5>\\n This has changed the way we eat. It\\'s easier to use than I thought it would be.\\n </h5>\\n<span>\\n Reviewed in the United States on August 18, 2016\\n </span>\\n</div></div></div></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><div><div><span><br/></span></div></div></div></div></div></span></li></ul></div></div></div></div></span></div></div></div></div></div></div></div></div></div></div></div></div></body></html>', \"<html><body><div><div><div><div><div><div><div><div><div><div><div><div><span><div><div><div><div><ul><li><span><div><div><div><div><div><div>\\n<h6>\\n Images in this review\\n </h6>\\n</div></div></div><span>\\n<div><span>5,558 people found this helpful</span></div>\\n<div>\\n<span><span>\\n Helpful\\n</span></span></div>\\n</span>\\n</div></div></div></span></li></ul>See more reviews</div></div></div></div><span><div>\\n<h3>\\n Top reviews from other countries\\n </h3>\\n<div>\\n<div><span><span><span>Translate all reviews to English</span></span></span>\\n</div>\\n</div>\\n</div></span><span><div><div><ul><li><span>\\n<div><div>\\n<div><div><div><span>Alheny</span></div></div></div><div><h5><i><span>5.0 out of 5 stars</span></i><span>\\n<span>Excelente</span>\\n</span></h5></div><span>Reviewed in Mexico on June 4, 2025</span><div><span>Size: 6 Quarts</span>Verified Purchase</div><div><span>\\n<div><div>\\n<span>Excelente producto, la recomiendo totalmente, facilita el trabajo en la cocina</span>\\n</div><div>Read more</div></div></span></div><div><span>\\n<span><span>Report</span></span></span>\\n<div><span>Translate review to English</span></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><div><div><span>NeuroEmergent</span></div></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><h5><i><span>5.0 out of 5 stars</span></i><span>\\n<span>A truly Canadian innovation - Instant Pot is the best item in my kitchen, hands down</span>\\n</span></h5></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><span>Reviewed in Canada on November 23, 2017</span></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><span>Size: 6 Quarts</span>Verified Purchase</div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><span><div><div>Read more</div></div></span></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><div><div><div>\\n<div>\\n<div><div><span>NeuroEmergent</span></div></div>\\n</div>\\n<i><span>5.0 out of 5 stars</span></i>\\n<h5>\\n A truly Canadian innovation - Instant Pot is the best item in my kitchen, hands down\\n </h5>\\n<span>\\n Reviewed in Canada on November 23, 2017\\n </span>\\n</div></div></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><div><div><span><br/></span></div></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><div><div><div>\\n<h6>\\n Images in this review\\n </h6>\\n</div></div></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><span>\\n<span><span>Report</span></span></span>\\n</div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div>\\n<div><div><div><span>MV</span></div></div></div><div><h5><i><span>5.0 out of 5 stars</span></i><span>\\n<span>3 Qt Instant Pot. LOVE IT!!!</span>\\n</span></h5></div><span>Reviewed in Canada on December 25, 2024</span><div><span>Size: 3 Quarts</span>Verified Purchase</div><div><span><div><div>\\n<span>My main cooking appliance. Uses only 675 watts max to build pressure, then mostly 0 watts under pressure but occasionally spiking back to 675 watts to keep the pressure. 3 Qts is a great size for 1 or 2 people, or even more depending on what you are cooking. Takes some practice, reading the manual and recipe guide and trial and error to tweak preferred times. It will even boil a pot of water like a kettle, which I tried as a test but forgot to time it. Fantastic appliance for off grid energy efficiency and used far more than the induction hot plate. So far nothing it hasn't cooked. Also extremely safe with the On Off and delay timers and turns off if it were to boil dry, unlike a gas stove which could burn down your house. Can't tell you how I know that. Fantastic for seniors for safety if they can get over all the preset buttons which are not needed anyway and just learn to use the few buttons and functions required to cook almost anything. Highly Recommended.</span>\\n</div><div>Read more</div></div></span></div><div><span>\\n<span><span>Report</span></span></span>\\n</div></div></div></span></li></ul></div></div></span></span></div></div></div></div></div></div></div></div></div></div></div></div></body></html>\", '<html><body><div><div><div><div><div><div><div><div><div><div><div><div><span><span><div><div><ul><li><span>\\n<div><div>\\n<div><div><div><span>Laissan sayab perez</span></div></div></div><div><h5><i><span>5.0 out of 5 stars</span></i><span>\\n<span>Gran inversión para la cocina</span>\\n</span></h5></div><span>Reviewed in Mexico on March 30, 2025</span><div><span>Size: 3 Quarts</span>Verified Purchase</div><div><span><div><div>\\n<span>Gran inversión para la cocina, soy una persona muy ocupada y me gusta cuidar de mi salud me cocino, pero en los guisos y cocciones de frijoles se consume mucho gas , opté por esta olla que vi, ya hice mi primer caldo de res con verduras quedó la carne muy suave en poco tiempo ⏱️ me encantó, tiene muy buena seguridad para la presión.Lo que me encanta:✔️ Cocina mucho más rápido que una olla convencional.✔️ Tiene varias funciones, desde cocción a presión hasta salteado.✔️ Es segura y fácil de limpiar.Lo que podría mejorar:🔹 La curva de aprendizaje puede ser un poco alta al inicio, pero una vez que entiendes los tiempos y funciones, todo es sencillo.En general, es una excelente compra si quieres ahorrar tiempo en la cocina y hacer recetas deliciosas sin complicaciones. ¡La recomiendo totalmente!</span>\\n</div><div>Read more</div></div></span></div><div>\\n<div>\\n<div>\\n<div>\\n<div>\\n<div><div><span>Laissan </span></div></div>\\n</div>\\n<i><span>5.0 out of 5 stars</span></i>\\n<h5>\\n Gran inversión para la cocina\\n </h5>\\n<span>\\n Reviewed in Mexico on March 30, 2025\\n </span>\\n</div>\\n<span>\\n Gran inversión para la cocina, soy una persona muy ocupada y me gusta cuidar de mi salud me cocino, pero en los guisos y cocciones de frijoles se consume mucho gas , opté por esta olla que vi, ya hice mi primer caldo de res con verduras quedó la carne muy suave en poco tiempo ⏱️ me encantó, tiene muy buena seguridad para la presión.<br/>Lo que me encanta:✔️ Cocina mucho más rápido que una olla convencional.✔️ Tiene varias funciones, desde cocción a presión hasta salteado.✔️ Es segura y fácil de limpiar.Lo que podría mejorar:🔹 La curva de aprendizaje puede ser un poco alta al inicio, pero una vez que entiendes los tiempos y funciones, todo es sencillo.En general, es una excelente compra si quieres ahorrar tiempo en la cocina y hacer recetas deliciosas sin complicaciones. ¡La recomiendo totalmente!\\n</span>\\n<div>\\n<h6>\\n Images in this review\\n </h6>\\n</div>\\n</div>\\n</div>\\n</div>\\n<div><span>\\n<span><span>Report</span></span></span>\\n<div><span>Translate review to English</span></div></div></div></div></span></li><div>See more reviews</div></ul></div></div></span></span></div></div></div></div></div></div></div>\\nGet to Know Us</div></div></div> <div> <div> <div> Your recently viewed items and featured recommendations </div> <div> <div> <div> <div> › </div> <div> View or edit your browsing history </div> </div> <span> After viewing product detail pages, look here to find an easy way to navigate back to pages you are interested in. </span> </div> </div> </div> </div></div></div></body></html>']\n",
294
- "Using Hugging Face reranker for classification.\n",
295
- "Scores for passages: [0.6365740895271301, 0.42525428533554077, 0.16477522253990173, 0.33392345905303955, 0.2433975487947464, 0.27956345677375793, 0.3777231276035309, 0.49405714869499207, 0.6539114713668823, 0.5250579714775085, 0.3064478635787964]\n",
296
- "top indices: [8, 0, 9]\n",
297
  "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n",
298
- "Final output: {'productTitle': 'Instant Pot Duo 7-in-1 Electric Pressure Cooker, Slow Cooker, Rice Cooker, Steamer, Sauté, Yogurt Maker, Warmer & Sterilizer, Includes App With Over 800 Recipes, Stainless Steel, 6 Quart', 'price': 'N/A', 'manufacturer': 'Instant Pot'}\n",
299
  "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"
300
  ]
301
  },
302
  {
303
  "data": {
304
  "text/plain": [
305
- "{'productTitle': 'Instant Pot Duo 7-in-1 Electric Pressure Cooker, Slow Cooker, Rice Cooker, Steamer, Sauté, Yogurt Maker, Warmer & Sterilizer, Includes App With Over 800 Recipes, Stainless Steel, 6 Quart',\n",
306
- " 'price': 'N/A',\n",
307
- " 'manufacturer': 'Instant Pot'}"
308
  ]
309
  },
310
- "execution_count": 15,
311
  "metadata": {},
312
  "output_type": "execute_result"
313
  }
314
  ],
315
  "source": [
316
- "pipe.run(content=url,is_url=True, schema=schema, hf=True)"
317
  ]
318
  },
319
  {
@@ -341,7 +498,7 @@
341
  "name": "python",
342
  "nbconvert_exporter": "python",
343
  "pygments_lexer": "ipython3",
344
- "version": "3.11.8"
345
  }
346
  },
347
  "nbformat": 4,
 
5
  "execution_count": 1,
6
  "id": "5223b1b7",
7
  "metadata": {},
8
+ "outputs": [],
 
 
 
 
 
 
 
 
 
9
  "source": [
10
  "from web2json.preprocessor import *\n",
11
  "from web2json.ai_extractor import *\n",
 
40
  "execution_count": 3,
41
  "id": "9e6b0eb9",
42
  "metadata": {},
43
+ "outputs": [],
 
 
 
 
 
 
 
 
 
44
  "source": [
45
  "llm = NvidiaLLMClient(config={'api_key': os.getenv('NVIDIA_API_KEY'),'model_name': 'qwen/qwen2.5-7b-instruct'})\n",
46
+ "reranker = NvidiaRerankerClient(config={'api_key': os.getenv('NVIDIA_API_KEY'),'model_name': 'nv-rerank-qa-mistral-4b:1'})\n",
47
+ "# reranker = HFRerankerClient()"
48
  ]
49
  },
50
  {
 
153
  "post = PostProcessor()"
154
  ]
155
  },
156
+ {
157
+ "cell_type": "code",
158
+ "execution_count": 8,
159
+ "id": "c4e75e63",
160
+ "metadata": {},
161
+ "outputs": [],
162
+ "source": [
163
+ "html_chunks = [\n",
164
+ " \"\"\"\n",
165
+ " <div class=\"product-card\">\n",
166
+ " <h2 class=\"product-title\">Wireless Noise Cancelling Headphones</h2>\n",
167
+ " <p class=\"product-description\">Experience immersive sound with active noise cancellation and long battery life.</p>\n",
168
+ " <span class=\"price\">$299.99</span>\n",
169
+ " <button>Add to Cart</button>\n",
170
+ " </div>\n",
171
+ " \"\"\",\n",
172
+ "\n",
173
+ " \"\"\"\n",
174
+ " <section class=\"blog-post\">\n",
175
+ " <h1>Top 5 AI Tools to Try in 2025</h1>\n",
176
+ " <p>Artificial intelligence continues to evolve. Here are five tools you should explore in 2025:</p>\n",
177
+ " <ul>\n",
178
+ " <li>LangChain</li>\n",
179
+ " <li>AutoGen</li>\n",
180
+ " <li>OpenDevin</li>\n",
181
+ " <li>FastRAG</li>\n",
182
+ " <li>GPTScript</li>\n",
183
+ " </ul>\n",
184
+ " <footer>Published by <strong>TechToday</strong> on June 30, 2025</footer>\n",
185
+ " </section>\n",
186
+ " \"\"\",\n",
187
+ "\n",
188
+ " \"\"\"\n",
189
+ " <section class=\"blog-post\">\n",
190
+ " <h1>Top 5 AI Tools to Try in 2025</h1>\n",
191
+ " <p>Artificial intelligence continues to evolve. Here are five tools you should explore in 2025:</p>\n",
192
+ " <ul>\n",
193
+ " <li>LangChain</li>\n",
194
+ " <li>AutoGen</li>\n",
195
+ " <li>OpenDevin</li>\n",
196
+ " <li>FastRAG</li>\n",
197
+ " <li>GPTScript</li>\n",
198
+ " </ul>\n",
199
+ " <footer>Published by <strong>TechToday</strong> on June 30, 2025</footer>\n",
200
+ " </section>\n",
201
+ " \"\"\",\n",
202
+ "\n",
203
+ " \"\"\"\n",
204
+ " <div class=\"review\">\n",
205
+ " <h3>User Review: Amazing Performance!</h3>\n",
206
+ " <p>I’ve been using this laptop for a few months and it’s blazing fast. Great for deep learning workloads!</p>\n",
207
+ " <div class=\"rating\">Rating: ⭐⭐⭐⭐⭐</div>\n",
208
+ " <span class=\"user\">– Sarah M.</span>\n",
209
+ " </div>\n",
210
+ " \"\"\"\n",
211
+ "]\n"
212
+ ]
213
+ },
214
+ {
215
+ "cell_type": "code",
216
+ "execution_count": 9,
217
+ "id": "bb4edecf",
218
+ "metadata": {},
219
+ "outputs": [
220
+ {
221
+ "data": {
222
+ "text/plain": [
223
+ "4"
224
+ ]
225
+ },
226
+ "execution_count": 9,
227
+ "metadata": {},
228
+ "output_type": "execute_result"
229
+ }
230
+ ],
231
+ "source": [
232
+ "len(html_chunks)"
233
+ ]
234
+ },
235
+ {
236
+ "cell_type": "code",
237
+ "execution_count": null,
238
+ "id": "9927a78e",
239
+ "metadata": {},
240
+ "outputs": [],
241
+ "source": [
242
+ "output = reranker.rerank(query=classification_prompt_template,passages=html_chunks)"
243
+ ]
244
+ },
245
+ {
246
+ "cell_type": "code",
247
+ "execution_count": 14,
248
+ "id": "b77015f3",
249
+ "metadata": {},
250
+ "outputs": [
251
+ {
252
+ "name": "stdout",
253
+ "output_type": "stream",
254
+ "text": [
255
+ "page_content='\n",
256
+ " <div class=\"product-card\">\n",
257
+ " <h2 class=\"product-title\">Wireless Noise Cancelling Headphones</h2>\n",
258
+ " <p class=\"product-description\">Experience immersive sound with active noise cancellation and long battery life.</p>\n",
259
+ " <span class=\"price\">$299.99</span>\n",
260
+ " <button>Add to Cart</button>\n",
261
+ " </div>\n",
262
+ " ' metadata={'relevance_score': -11.25, 'softmax_score': 0.9546922134634852, 'minmax_score': 1.0}\n",
263
+ "--------------------------------------------------------------------------------\n",
264
+ "page_content='\n",
265
+ " <section class=\"blog-post\">\n",
266
+ " <h1>Top 5 AI Tools to Try in 2025</h1>\n",
267
+ " <p>Artificial intelligence continues to evolve. Here are five tools you should explore in 2025:</p>\n",
268
+ " <ul>\n",
269
+ " <li>LangChain</li>\n",
270
+ " <li>AutoGen</li>\n",
271
+ " <li>OpenDevin</li>\n",
272
+ " <li>FastRAG</li>\n",
273
+ " <li>GPTScript</li>\n",
274
+ " </ul>\n",
275
+ " <footer>Published by <strong>TechToday</strong> on June 30, 2025</footer>\n",
276
+ " </section>\n",
277
+ " ' metadata={'relevance_score': -15.2265625, 'softmax_score': 0.017900461577508887, 'minmax_score': 0.00888037271767236}\n",
278
+ "--------------------------------------------------------------------------------\n",
279
+ "page_content='\n",
280
+ " <section class=\"blog-post\">\n",
281
+ " <h1>Top 5 AI Tools to Try in 2025</h1>\n",
282
+ " <p>Artificial intelligence continues to evolve. Here are five tools you should explore in 2025:</p>\n",
283
+ " <ul>\n",
284
+ " <li>LangChain</li>\n",
285
+ " <li>AutoGen</li>\n",
286
+ " <li>OpenDevin</li>\n",
287
+ " <li>FastRAG</li>\n",
288
+ " <li>GPTScript</li>\n",
289
+ " </ul>\n",
290
+ " <footer>Published by <strong>TechToday</strong> on June 30, 2025</footer>\n",
291
+ " </section>\n",
292
+ " ' metadata={'relevance_score': -15.2265625, 'softmax_score': 0.017900461577508887, 'minmax_score': 0.00888037271767236}\n",
293
+ "--------------------------------------------------------------------------------\n",
294
+ "page_content='\n",
295
+ " <div class=\"review\">\n",
296
+ " <h3>User Review: Amazing Performance!</h3>\n",
297
+ " <p>I’ve been using this laptop for a few months and it’s blazing fast. Great for deep learning workloads!</p>\n",
298
+ " <div class=\"rating\">Rating: ⭐⭐⭐⭐⭐</div>\n",
299
+ " <span class=\"user\">– Sarah M.</span>\n",
300
+ " </div>\n",
301
+ " ' metadata={'relevance_score': -15.859375, 'softmax_score': 0.009506863381497203, 'minmax_score': 0.0}\n",
302
+ "--------------------------------------------------------------------------------\n"
303
+ ]
304
+ }
305
+ ],
306
+ "source": [
307
+ "for o in output:\n",
308
+ " print(o)\n",
309
+ " print('-'*80)"
310
+ ]
311
+ },
312
+ {
313
+ "cell_type": "code",
314
+ "execution_count": 42,
315
+ "id": "bb3fa1b0",
316
+ "metadata": {},
317
+ "outputs": [
318
+ {
319
+ "data": {
320
+ "text/plain": [
321
+ "4"
322
+ ]
323
+ },
324
+ "execution_count": 42,
325
+ "metadata": {},
326
+ "output_type": "execute_result"
327
+ }
328
+ ],
329
+ "source": [
330
+ "len(output)"
331
+ ]
332
+ },
333
  {
334
  "cell_type": "code",
335
  "execution_count": 8,
 
411
  },
412
  {
413
  "cell_type": "code",
414
+ "execution_count": 16,
415
  "id": "f07e1aca",
416
  "metadata": {},
417
  "outputs": [],
 
439
  },
440
  {
441
  "cell_type": "code",
442
+ "execution_count": 21,
443
  "id": "79cf2321",
444
  "metadata": {},
445
  "outputs": [
 
448
  "output_type": "stream",
449
  "text": [
450
  "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n",
451
+ "2\n",
452
+ "Content successfully chunked into 9.\n",
453
+ "Content successfully chunked: [\"<html><head>\\n<link/>\\n<link/>\\n<link/>\\n<meta/><title>Amazon.com: Instant Pot Duo 7-in-1 Electric Pressure Cooker, Slow Cooker, Rice Cooker, Steamer, Sauté, Yogurt Maker, Warmer &amp; Sterilizer, Includes App With Over 800 Recipes, Stainless Steel, 6 Quart</title>\\n</head><body><div><nav>\\n<h2>Skip to</h2>\\n<ul>\\n<li>\\n Main content\\n</li>\\n<li>\\n About this item\\n</li>\\n<li>\\n About this item\\n</li>\\n<li>\\n About this item\\n</li>\\n<li>\\n Buying options\\n</li>\\n<li>\\n Compare with similar items\\n</li>\\n<li>\\n Videos\\n</li>\\n<li>\\n Reviews\\n</li>\\n</ul>\\n<h2>\\n Keyboard shortcuts\\n </h2>\\n<ul>\\n<li>\\nSearch\\nalt\\n+\\n/\\n</li>\\n<li>\\nCart\\nshift\\n+\\nalt\\n+\\nC\\n</li>\\n<li>\\nHome\\nshift\\n+\\nalt\\n+\\nH\\n</li>\\n<li>\\nOrders\\nshift\\n+\\nalt\\n+\\nO\\n</li>\\n<li>\\n<button>\\n<div>\\n<span>Add to cart</span>\\n<div>\\n<span>shift</span>\\n<span>+</span>\\n<span>alt</span>\\n<span>+</span>\\n<span>K</span>\\n</div>\\n</div>\\n</button>\\n</li>\\n<li>\\n<button>\\n<div>\\n<span>Show/Hide shortcuts</span>\\n<div>\\n<span>shift</span>\\n<span>+</span>\\n<span>alt</span>\\n<span>+</span>\\n<span>Z</span>\\n</div>\\n</div>\\n</button>\\n</li>\\n</ul>\\n<div>\\n<div>\\n<div>\\n<div>\\n<span>To move between items, use your keyboard's up or down arrows.</span>\\n</div>\\n</div>\\n</div>\\n</div>\\n</nav></div></body><body><div><div><div><div>\\n<div>\\n<div><div><div><ul><li><span>Home &amp; Kitchen</span></li><li><span>›</span></li><li><span>Kitchen &amp; Dining</span></li><li><span>›</span></li><li><span>Small Appliances</span></li><li><span>›</span></li><li><span>Rice Cookers</span></li></ul></div></div></div> </div>\\n</div></div></div></div></body></html>\", '<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div> <div> <h5> <div>\\n<div> <span> Deal Price Regular Price </span> </div> </div>\\n</h5> <div>\\n<div>\\n<div>\\n<div>\\n<div>\\n<div> <div>\\n<div>\\n<span><span>$79.99</span><span><span>$</span><span>79<span>.</span></span><span>99</span></span></span> </div>\\n</div> </div> </div>\\n</div>\\n</div>\\n</div>\\n<div>\\n<div> <div> <div> <span> Ships from: </span> <span> Amazon.com </span> </div> </div> <div> <div> <span> Sold by: </span> <span> Amazon.com </span> </div> </div> </div> </div>\\n</div>\\n<div><form><input/></form></div><div><form><div><div><div>\\n<div>\\n<span> $235.34 Shipping &amp; Import Fees Deposit to Egypt </span> <span> Details </span> <div> <h3>Shipping &amp; Fee Details</h3>\\n<table> <tr> <td> <span> Price </span> </td> <td> <span> $99.95 </span> </td> </tr> <tr> <td> <span> AmazonGlobal Shipping </span> </td> <td> <span> $81.05 </span> </td> </tr> <tr> <td> <span> \\n Estimated Import Fees Deposit\\n</span> </td> <td> <span> $154.29 </span> </td> </tr> <tr> <td> <span>Total</span> </td> <td> <span> $335.29 </span> </td> </tr> </table> </div>\\n</div>\\n<div>\\n<div>\\n<div><div><div><span> Delivery <span>Sunday, July 13</span>. Order within <span>23 hrs 59 mins</span> </span></div></div></div> </div>\\n<div>\\n<span> \\nDeliver to\\xa0Egypt\\n </span> </div>\\n</div>\\n</div></div></div></form></div><div><form><div><div><div>\\n<div> <div> <span> In Stock </span> </div> </div> </div></div></div></form></div></div> <div>\\n<div>\\n<div> <div> <span> This deal is exclusively for Amazon Prime members. </span> </div>\\n<div> <div>\\n<span><span><input/><span> Join Prime </span></span></span> </div>\\n<div> <span>Cancel anytime</span> </div> </div> <div> <span> Already a member? </span> Sign in </div> </div> </div>\\n<div>\\n<div> <div> <div> <div>\\n<div>\\n<div>\\n<div>\\n<span>Ships from</span> </div>\\n</div>\\n<div>\\n<div>\\n<span>Amazon.com</span> </div>\\n<span> Amazon.com </span> <div> <div> <div> <span>Ships from</span> </div> <div> Amazon.com </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Sold by</span> </div>\\n</div>\\n<div>\\n<div>\\n<span>Amazon.com</span> </div>\\n<span> Amazon.com </span> <div> <div> <div> <span>Sold by</span> </div> <div> Amazon.com </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Returns</span> </div>\\n</div>\\n<div>\\n<span> 30-day refund/replacement </span> <div> <div> <div> <span>30-day refund/replacement</span> </div> <div> This item can be returned in its original condition for a full refund or replacement within 30 days of receipt. </div> <div> Read full return policy </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Payment</span> </div>\\n</div>\\n<div>\\n<span> Secure transaction </span> <div> <div> <div> <span>Your transaction is secure</span> </div> <div> We work hard to protect your security and privacy. Our payment security system encrypts your information during transmission. We don’t share your credit card details with third-party sellers, and we don’t sell your information to others. Learn more </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Support</span> </div>\\n</div>\\n<div>\\n<span> Product support included </span> <div> <div> <div> <span>What\\'s Product Support?</span> </div> <div> In the event your product doesn\\'t work as expected or you need help using it, Amazon offers free product support options such as live phone/chat with an Amazon associate, manufacturer contact information, step-by-step troubleshooting guides, and help videos. \\nBy solving product issues, we help the planet by extending the life of products. Availability of support options differ by product and country. Learn more </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Packaging</span> </div>\\n</div>\\n<div>\\n<span> Ships in product packaging </span> <div> <div> <div> <span>Ships in product packaging</span> </div> <div> <p>This item has been tested to certify it can ship safely in its original box or bag to avoid unnecessary packaging. Since 2015, we have reduced the weight of outbound packaging per shipment by 41% on average, that’s over 2 million tons of packaging material.</p><i>If you still require Amazon packaging for this item, choose \"Ship in Amazon packaging\" at checkout. </i> Learn more </div> </div> </div> </div> </div>\\n</div>\\n</div> <div> <div>See more</div> </div> </div> </div> </div>\\n</div> </div></div></div></div></div></div></div></div></div></div></div></div></div></div></body></html>', \"<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><form><div><div><div>\\n<div>\\n<div> <span> <div> <div>\\n<span><label>Quantity:</label><select> <option>1 </option> <option>2 </option> <option>3 </option> <option>4 </option> <option>5 </option> <option>6 </option> <option>7 </option> <option>8 </option> <option>9 </option> <option>10 </option> <option>11 </option> <option>12 </option> <option>13 </option> <option>14 </option> <option>15 </option> <option>16 </option> <option>17 </option> <option>18 </option> <option>19 </option> <option>20 </option> <option>21 </option> <option>22 </option> <option>23 </option> <option>24 </option> <option>25 </option> <option>26 </option> <option>27 </option> <option>28 </option> <option>29 </option> <option>30 </option> </select><span><span><span><span>Quantity:</span><span>1</span></span></span></span></span> </div> </div> <span><input/><span> Buy Now </span></span></span> <div><div> <span> Enhancements you chose aren't available for this seller. </span> <span> Details </span> <div> <div> <div> <div> <span> To add the following enhancements to your purchase, choose a different seller. </span> </div> <div> <span>%cardName%</span> </div> </div> </div> </div> </div></div></div> <span> <span><span><input/><span>Add to Cart</span></span></span> </span></div> <input/><div> <div> <span>$</span><span><span><span>$79.99</span><span><span>79<span>.</span></span><span>99</span></span></span></span> <span> \\n ()\\n </span> <span> Includes selected options. </span> <span> Includes initial monthly payment and selected options. </span> <span> <span> <span>\\n Details </span>\\n</span> <div> <div> <div> <div> <div><div> <div> <span>Price</span> <span> <span> <span> (</span><span>$</span><span>79<span>.</span></span><span>99</span><span>x)</span> </span> </span> </div> <div> <span> <span> <span>$</span><span>79<span>.</span></span><span>99</span> </span> </span> </div> </div></div> </div> <div> <div><div> <div> <span>Subtotal</span> </div> <div> <span> <span>$</span><span><span><span>$79.99</span><span><span>79<span>.</span></span><span>99</span></span></span></span> </span> </div> </div></div> <div><div> <div> <span>Subtotal</span> </div> </div></div> <div> <div> <span>Initial payment breakdown</span> </div> </div> <div> <span>Shipping cost, delivery date, and order total (including tax) shown at checkout.</span>\\n</div> </div> </div> </div> </div> </span> </div> </div></div></div></div></form></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></body></html>\", '<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><form><div><div><div>\\n<div> <div> <div> <div>\\n<div>\\n<div>\\n<div>\\n<span>Ships from</span> </div>\\n</div>\\n<div>\\n<div>\\n<span>Amazon.com</span> </div>\\n<span> Amazon.com </span> <div> <div> <div> <span>Ships from</span> </div> <div> Amazon.com </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Sold by</span> </div>\\n</div>\\n<div>\\n<div>\\n<span>Amazon.com</span> </div>\\n<span> Amazon.com </span> <div> <div> <div> <span>Sold by</span> </div> <div> Amazon.com </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Returns</span> </div>\\n</div>\\n<div>\\n<span> 30-day refund/replacement </span> <div> <div> <div> <span>30-day refund/replacement</span> </div> <div> This item can be returned in its original condition for a full refund or replacement within 30 days of receipt. </div> <div> Read full return policy </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<div>\\n<div>\\n<span>Packaging</span> </div>\\n</div>\\n<div>\\n<span> Ships in product packaging </span> <div> <div> <div> <span>Ships in product packaging</span> </div> <div> <p>This item has been tested to certify it can ship safely in its original box or bag to avoid unnecessary packaging. Since 2015, we have reduced the weight of outbound packaging per shipment by 41% on average, that’s over 2 million tons of packaging material.</p><i>If you still require Amazon packaging for this item, choose \"Ship in Amazon packaging\" at checkout. </i> Learn more </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Payment</span> </div>\\n</div>\\n<div>\\n<span> Secure transaction </span> <div> <div> <div> <span>Your transaction is secure</span> </div> <div> We work hard to protect your security and privacy. Our payment security system encrypts your information during transmission. We don’t share your credit card details with third-party sellers, and we don’t sell your information to others. Learn more </div> </div> </div> </div> </div>\\n<div>\\n<div>\\n<div>\\n<span>Support</span> </div>\\n</div>\\n<div>\\n<span> Product support included </span> <div> <div> <div> <span>What\\'s Product Support?</span> </div> <div> In the event your product doesn\\'t work as expected or you need help using it, Amazon offers free product support options such as live phone/chat with an Amazon associate, manufacturer contact information, step-by-step troubleshooting guides, and help videos. \\nBy solving product issues, we help the planet by extending the life of products. Availability of support options differ by product and country. Learn more </div> </div> </div> </div> </div>\\n</div>\\n</div>\\n</div>\\n</div> <div> <div>See more</div> </div> </div> <label><input/><span>Add a gift receipt for easy returns</span></label></div> </div></div></div></form></div><span>Instant Pot Duo</span></div></div></div>\\n</div>\\n</div></div> \\xa0 Report an issue with this product or seller<h4>Product voltage: 120</h4></div> <span> <span>8K+ bought</span><span> in past month</span> </span>Brief content visible, double tap to read full content.</div> Visit the Instant Pot Store \\n <ul> <li><span> 7-IN-1 FUNCTIONALITY: Pressure cook, slow cook, rice cooker, yogurt maker, steamer, sauté pan and food warmer. </span></li> <li><span> QUICK ONE-TOUCH COOKING: 13 customizable Smart Programs for pressure cooking ribs, soups, beans, rice, poultry, yogurt, desserts and more. </span></li> <li><span> COOK FAST OR SLOW: Pressure cook delicious one-pot meals up to 70% faster than traditional cooking methods or slow cook your favorite traditional recipes – just like grandma used to make. </span></li> <li><span> QUICK AND EASY CLEAN UP: Finger-print resistant, stainless-steel sides and dishwasher-safe lid, inner pot, and accessories. </span></li> <li><span> SAFETY FEATURES: Includes over 10 safety features, plus overheat protection and safe-locking lid </span></li> <li><span> GREAT FOR GROWING FAMILIES: Cook for up to 6 people – perfect for growing families, or meal prepping and batch cooking for singles. </span></li> <li><span> VERSATILE INNER COOKING POT: We use food-grade stainless-steel, a tri-ply bottom for more even cooking and perfect for sautéing </span></li> <li><span> DISCOVER AMAZING RECIPES: Includes the free Instant Brands Connect App, where you can find new recipes to create quick favorites and prepare delicious meals, available for iOS and Android. </span></li> </ul><div> <span>›</span> See more product details </div> <div><div> <div> Instant Pot RIO, 7-in-1 Electric Multi-Cooker, PressureCooker, SlowCooker, RiceCooker, Steamer, Sauté, Yogurt Maker, &amp; Warmer, Includes App With Over 800 Recipes, 6 Quart <span>$94.95</span> (2,374) <span>In Stock</span> </div> </div></div></div></div></div></div></div></div></div></body></html>', \"<html><body><div><div><div><div>\\n<div>\\n<div><div> <div><input/> <div><div><h2>Deals on related products</h2> <div> <div> <span> Sponsored </span> </div> </div> </div><div><span><span>Page <span>1</span> of <span>1</span></span><span>Start over</span></span></div></div> <div> <div><div><div>Previous page of related Sponsored Products</div><div><div><ol> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> CHEF iQ Smart Pressure Cooker with WiFi and Built-in Scale - Easy-to-Use 10-in-1 Mu... </div> <div> 2,645 </div> With Prime <div> -30%$139.98$139.98List Price:$199.99$199.99 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> Nuwave Duet Air Fryer, Electric Pressure Cooker &amp; Grill Combo, 540 IN 1 Multicooker... </div> <div> 378 </div> With Prime <div> -10%$146.10$146.10Typical price:$162.33$162.33 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> Hamilton Beach 3-in-1 Electric Egg Cooker for Hard Boiled Eggs, Poacher Eggs, Omele... </div> <div> 5,210 </div> <div> Amazon's\\xa0Choice </div> Ends in <div> -19%$16.98$16.98List:$20.95$20.95 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> CUCKOO CRP-ST1009FW 10-Cup (Uncooked) / 20-Cup (Cooked) Twin Pressure Rice Cooker &amp;... </div> <div> 366 </div> <div> Amazon's\\xa0Choice </div> With Prime <div> -31%$239.99$239.99List Price:$349.99$349.99 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> WantJoin Pressure Cooker, 8 Quart Stainless Steel Pressure Canner, Induction Compat... </div> <div> 947 </div> <div> Amazon's\\xa0Choice </div> Limited time deal <div> -10%$80.89$80.89List:$89.99$89.99 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> Buffalo Classic Rice Cooker with Clad Stainless Steel Inner Pot - Electric Rice Coo... </div> <div> 479 </div> Ends in <div> -15%$126.65$126.65List Price$149.00$149.00 </div> </div> </li> <li> <div> <div> <div> <span> Feedback </span> </div> </div> <div> Pizza Oven Indoor, Countertop Electric Pizza Maker 12-inch, 2-minute Pizza, 6 Prese... </div> <div> 12 </div> Limited time deal <div> -15%$169.99$169.99List:$199.99$199.99 </div> </div> </li> </ol></div></div><div>Next page of related Sponsored Products</div></div></div> </div> <span>\\n<div>\\n<div><div><div><div><h2>Customer reviews</h2></div><div><div><div><i><span>4.6 out of 5 stars</span></i></div><div><div><span><span>4.6 out of 5</span></span></div></div></div></div><div><span>130,204 global ratings</span></div><div><div><div><ul><li><span>5 star4 star3 star2 star1 star5 star83%10%3%1%3%83%</span></li><li><span>5 star4 star3 star2 star1 star4 star83%10%3%1%3%10%</span></li><li><span>5 star4 star3 star2 star1 star3 star83%10%3%1%3%3%</span></li><li><span>5 star4 star3 star2 star1 star2 star83%10%3%1%3%1%</span></li><li><span>5 star4 star3 star2 star1 star1 star83%10%3%1%3%3%</span></li></ul></div></div></div><div><div><div><div>How customer reviews and ratings work<div><p>Customer Reviews, including Product Star Ratings help customers to learn more about the product and decide whether it is the right product for them.</p><p>To calculate the overall star rating and percentage breakdown by star, we don’t use a simple average. Instead, our system considers things like how recent a review is and if the reviewer bought the item on Amazon. It also analyzed reviews to verify trustworthiness.</p>Learn more how customers reviews work on Amazon</div></div></div></div></div></div></div></div>\\n</div></span><div><h3>Review this product</h3><div>Share your thoughts with other customers</div><div><span><span>Write a customer review</span></span></div></div></div> </div></div>\\n</div></div></div></div></div></body></html>\", '<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><h3>Customers say</h3><div><div><div><div><h4>Select to learn more</h4></div><div>Works wellEase of useCook timeAppliance qualityCooking abilityEase of cleaningFlavorBuild quality</div></div></div></div><div><div><div><div><div><div><div><span>8,040 customers mention \"Works well\"</span><span>6,940 positive</span><span>1,100 negative</span></div></div></div><div><div><p>Customers find that the pressure cooker works well, with the sauté feature performing particularly effectively.</p></div></div><div><div><p>\"...This <b>works with new potatoes</b>, and regular potatoes! Happy Instant Potting!\" Read more</p></div></div><div><div><p>\"...<b>It was excellent</b>. I did 6 minutes per pound + 2 minutes. I also cook chicken thighs for dinner about once a week, which I had never cooked before....\" Read more</p></div></div><div><div><p>\"...Most <b>programs work just fine on full automatic</b>, but some small exceptions may demand more online flexibility....\" Read more</p></div></div><div><div><p>\"...occasional mishaps, the Instant Pot Duo has consistently <b>delivered incredible results</b>....\" Read more</p></div></div></div></div></div></div><div><div><div><div><div><div><div><span>7,651 customers mention \"Ease of use\"</span><span>6,651 positive</span><span>1,000 negative</span></div></div></div><div><div><p>Customers find the pressure cooker simple to use, with clear operating instructions in the booklet, making meal preparation a breeze.</p></div></div><div><div><p>\"...make in your Instant Pot that will change your life: <b>incredibly easy perfectly poached eggs</b> in 2-3 minutes, and baked potatoes in 12 minutes....\" Read more</p></div></div><div><div><p>\"...credit as most automatic settings work well, automating it for <b>ease of use</b> and safety. Cooking is part Science, but, I think, more Art than Science....\" Read more</p></div></div><div><div><p>\"...crockpot extensively over the past years and while I appreciate the <b>ease of use</b> and the ability to put a meal on the table soon after I got home in...\" Read more</p></div></div><div><div><p>\"...of pressure cookers anymore, the time , energy bills saved n <b>convenience is worth it</b>!...\" Read more</p></div></div></div></div></div></div><div><div><div><div><div><div><div><span>6,666 customers mention \"Cook time\"</span><span>6,260 positive</span><span>406 negative</span></div></div></div><div><div><p>Customers appreciate the pressure cooker\\'s quick cooking time, with one mentioning it can make rice in just 10 minutes, while another notes it cooks like a crockpot in 1/8th the time.</p></div></div><div><div><p>\"...incredibly easy perfectly poached eggs in 2-3 minutes, and <b>baked potatoes in 12 minutes</b>....\" Read more</p></div></div><div><div><p>\"...My kids love it. <b>8 minutes on manual with a natural release</b>. I just stir it with a fork and don\\'t even need to blend it....\" Read more</p></div></div><div><div><p>\"...steel liner (looks like chrome), along with the <b>delay and cooking timer auto-shutoff</b>. This sets it apart from old-time swisher type 1st Gen P.C.\\'s....\" Read more</p></div></div><div><div><p>\"...versatile appliance seamlessly transforms into a pressure cooker, <b>slow cooker</b>, rice cooker, steamer, sauté pan, yogurt maker, warmer, and even a...\" Read more</p></div></div></div></div></div></div><div><div><div><div><div><div><div><span>5,399 customers mention \"Appliance quality\"</span><span>5,399 positive</span><span>0 negative</span></div></div></div><div><div><p>Customers find the pressure cooker to be a fabulous kitchen appliance, with one customer noting its versatility as both a pressure cooker and crockpot.</p></div></div><div><div><p>\"...When you are ready for your potatoes, they will be <b>perfectly done</b> and waiting for you, even if you have abandoned them for hours!...\" Read more</p></div></div><div><div><p>\"...I have to use a rapid boil just to make tea. A <b>pressure cooker is the great equalizer</b>, a must at higher altitudes because 15 lbs is 15 lbs pressure...\" Read more</p></div></div><div><div><p>\"...This <b>versatile appliance seamlessly transforms into a pressure cooker</b>, slow cooker, rice cooker, steamer, sauté pan, yogurt maker, warmer, and even...\" Read more</p></div></div><div><div><p>\"...It\\'s just better insulated, but I\\'ve found that <b>meals are so good under pressure</b> that there\\'s no need to use the slow cooker function....\" Read more</p></div></div></div></div></div></div></div><p><span>Customers find the pressure cooker works well, particularly praising its sauté feature and accurate cooking times. They appreciate its ease of use, with one customer noting the intuitive controls, and consider it a great kitchen appliance that makes meal prep convenient. The appliance receives positive feedback for its cooking ability, with one customer highlighting its versatility in transforming into a pressure cooker, and customers find it easy to clean with a stainless steel pot that cleans well. Customers enjoy the complex flavors produced, though opinions on build quality are mixed, with some finding it well-made while others describe it as wimpy.</span></p><p><span>AI Generated from the text of customer reviews</span></p></div></div></div></div></div></div></div></div></div></div></div></div></div></div></div></body></html>', '<html><body><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><div><span>4,396 customers mention \"Cooking ability\"3,065 customers mention \"Ease of cleaning\"2,827 customers mention \"Flavor\"2,273 customers mention \"Build quality\"</span><span>4,298 positive</span><span>98 negative</span>Amazon Customer<i><span>5 out of 5 stars</span></i>MoreHide</div><h5>This has changed the way we eat. It\\'s easier to use than I thought it would be.</h5></div></div><div><div><p>Customers praise the pressure cooker\\'s cooking ability, particularly its amazing recipes and rice cooking feature, with one customer noting it makes stir-fry dishes and another mentioning it\\'s easy to use on the dining room table.</p></div></div><div><div><p>\"...there in the morning, leave for the day, and come back to a <b>perfectly cooked whatever</b>, just waiting for you! Booyah!...\" Read more</p></div></div><div><div><p>\"...You could <b>very easily cook on the dining room table</b>, or a small adjacent table....\" Read more</p></div></div><div><div><p>\"...While the <b>free app provided great recipes</b> and guidance, a comprehensive manual would have been helpful for understanding all the features and...\" Read more</p></div></div><div><div><p>\"...This handy appliance has <b>transformed my summertime cooking</b>, allowing me to break away from our usual salads and grilled chicken rut....\" Read more</p></div></div></div>Sorry we couldn\\'t load the review</div><span><div><div><div>Thank you for your feedback</div><button>Close</button></div></div></span><span><div><div><div>Sorry, there was an error</div><button>Close</button></div></div></span></div><button><span>All photos</span></button></div></div><input/><div><div><div>Previous page</div><div>Next page</div></div></div></div>\\n View Image Gallery\\n</div></div></div><span><div><div><div><h3>\\n Top reviews from the United States\\n</h3></div></div></div></span><span><div><div><div><div><div><div><h4>There was a problem filtering reviews. Please reload the page.</h4></div></div></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div>Anne P. Mitchell</div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><h5>5.0 out of 5 stars\\nI LOVE My Instant Pot! But Here\\'s What I Wish I\\'d Known when I First Got It\\n</h5></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><span>Reviewed in the United States on April 16, 2016</span></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><span>Size: 6 Quarts</span>Verified Purchase</div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><span><div><div><span><br/></span></div></div></span></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><span><div><div>Read more</div></div></span></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><span>\\n<div><span>30,253 people found this helpful</span></div>\\n<div>\\n<span><span>\\n Helpful\\n</span></span></div>\\n</span><span>\\n<span><span>Report</span></span></span>\\n</div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div>Aundrea</div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><h5>5.0 out of 5 stars\\nThis has changed the way we eat. It\\'s easier to use than I thought it would be.\\n</h5></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><span>Reviewed in the United States on August 18, 2016</span></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><div><div><div>\\n<div>\\nAundrea\\n</div>\\n<i><span>5.0 out of 5 stars</span></i>\\n<h5>\\n This has changed the way we eat. It\\'s easier to use than I thought it would be.\\n </h5>\\n<span>\\n Reviewed in the United States on August 18, 2016\\n </span>\\n</div></div></div></div></div></div></span></li></ul></div></div></div></div></span><span><div><div><div><div><ul><li><span><div><div><div><div><div><span><br/></span></div></div></div></div></div></span></li></ul></div></div></div></div></span></div></div></div></div></div></div></div></div></div></div></div></body></html>', \"<html><body><div><div><div><div><div><div><div><div><div><div><div><span><div><div><div><div><ul><li><span><div><div><div><div><div><div>\\n<h6>\\n Images in this review\\n </h6>\\n</div></div></div><span>\\n<div><span>5,558 people found this helpful</span></div>\\n<div>\\n<span><span>\\n Helpful\\n</span></span></div>\\n</span>\\n</div></div></div></span></li></ul>See more reviews</div></div></div></div><span><div>\\n<h3>\\n Top reviews from other countries\\n </h3>\\n<div>\\n<div><span><span><span>Translate all reviews to English</span></span></span>\\n</div>\\n</div>\\n</div></span><span><div><div><ul><li><span>\\n<div><div>\\n<div><div><div><span>Alheny</span></div></div></div><div><h5><i><span>5.0 out of 5 stars</span></i><span>\\n<span>Excelente</span>\\n</span></h5></div><span>Reviewed in Mexico on June 4, 2025</span><div><span>Size: 6 Quarts</span>Verified Purchase</div><div><span>\\n<div><div>\\n<span>Excelente producto, la recomiendo totalmente, facilita el trabajo en la cocina</span>\\n</div><div>Read more</div></div></span></div><div><span>\\n<span><span>Report</span></span></span>\\n<div><span>Translate review to English</span></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><div><div><span>NeuroEmergent</span></div></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><h5><i><span>5.0 out of 5 stars</span></i><span>\\n<span>A truly Canadian innovation - Instant Pot is the best item in my kitchen, hands down</span>\\n</span></h5></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><span>Reviewed in Canada on November 23, 2017</span></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><span>Size: 6 Quarts</span>Verified Purchase</div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><span><div><div>Read more</div></div></span></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><div><div><div>\\n<div>\\n<div><div><span>NeuroEmergent</span></div></div>\\n</div>\\n<i><span>5.0 out of 5 stars</span></i>\\n<h5>\\n A truly Canadian innovation - Instant Pot is the best item in my kitchen, hands down\\n </h5>\\n<span>\\n Reviewed in Canada on November 23, 2017\\n </span>\\n</div></div></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><div><div><span><br/></span></div></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><div><div><div>\\n<h6>\\n Images in this review\\n </h6>\\n</div></div></div></div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div><div><span>\\n<span><span>Report</span></span></span>\\n</div></div></div></span></li></ul></div></div></span><span><div><div><ul><li><span><div><div>\\n<div><div><div><span>MV</span></div></div></div><div><h5><i><span>5.0 out of 5 stars</span></i><span>\\n<span>3 Qt Instant Pot. LOVE IT!!!</span>\\n</span></h5></div><span>Reviewed in Canada on December 25, 2024</span><div><span>Size: 3 Quarts</span>Verified Purchase</div><div><span><div><div>\\n<span>My main cooking appliance. Uses only 675 watts max to build pressure, then mostly 0 watts under pressure but occasionally spiking back to 675 watts to keep the pressure. 3 Qts is a great size for 1 or 2 people, or even more depending on what you are cooking. Takes some practice, reading the manual and recipe guide and trial and error to tweak preferred times. It will even boil a pot of water like a kettle, which I tried as a test but forgot to time it. Fantastic appliance for off grid energy efficiency and used far more than the induction hot plate. So far nothing it hasn't cooked. Also extremely safe with the On Off and delay timers and turns off if it were to boil dry, unlike a gas stove which could burn down your house. Can't tell you how I know that. Fantastic for seniors for safety if they can get over all the preset buttons which are not needed anyway and just learn to use the few buttons and functions required to cook almost anything. Highly Recommended.</span>\\n</div><div>Read more</div></div></span></div><div><span>\\n<span><span>Report</span></span></span>\\n</div></div></div></span></li></ul></div></div></span></span></div></div></div></div></div></div></div></div></div></div></div></body></html>\", '<html><body><div><div><div><div><div><div><div><div><div><div><div><span><span><div><div><ul><li><span>\\n<div><div>\\n<div><div><div><span>Laissan sayab perez</span></div></div></div><div><h5><i><span>5.0 out of 5 stars</span></i><span>\\n<span>Gran inversión para la cocina</span>\\n</span></h5></div><span>Reviewed in Mexico on March 30, 2025</span><div><span>Size: 3 Quarts</span>Verified Purchase</div><div><span><div><div>\\n<span>Gran inversión para la cocina, soy una persona muy ocupada y me gusta cuidar de mi salud me cocino, pero en los guisos y cocciones de frijoles se consume mucho gas , opté por esta olla que vi, ya hice mi primer caldo de res con verduras quedó la carne muy suave en poco tiempo ⏱️ me encantó, tiene muy buena seguridad para la presión.Lo que me encanta:✔️ Cocina mucho más rápido que una olla convencional.✔️ Tiene varias funciones, desde cocción a presión hasta salteado.✔️ Es segura y fácil de limpiar.Lo que podría mejorar:🔹 La curva de aprendizaje puede ser un poco alta al inicio, pero una vez que entiendes los tiempos y funciones, todo es sencillo.En general, es una excelente compra si quieres ahorrar tiempo en la cocina y hacer recetas deliciosas sin complicaciones. ¡La recomiendo totalmente!</span>\\n</div><div>Read more</div></div></span></div><div>\\n<div>\\n<div>\\n<div>\\n<div>\\n<div><div><span>Laissan </span></div></div>\\n</div>\\n<i><span>5.0 out of 5 stars</span></i>\\n<h5>\\n Gran inversión para la cocina\\n </h5>\\n<span>\\n Reviewed in Mexico on March 30, 2025\\n </span>\\n</div>\\n<span>\\n Gran inversión para la cocina, soy una persona muy ocupada y me gusta cuidar de mi salud me cocino, pero en los guisos y cocciones de frijoles se consume mucho gas , opté por esta olla que vi, ya hice mi primer caldo de res con verduras quedó la carne muy suave en poco tiempo ⏱️ me encantó, tiene muy buena seguridad para la presión.<br/>Lo que me encanta:✔️ Cocina mucho más rápido que una olla convencional.✔️ Tiene varias funciones, desde cocción a presión hasta salteado.✔️ Es segura y fácil de limpiar.Lo que podría mejorar:🔹 La curva de aprendizaje puede ser un poco alta al inicio, pero una vez que entiendes los tiempos y funciones, todo es sencillo.En general, es una excelente compra si quieres ahorrar tiempo en la cocina y hacer recetas deliciosas sin complicaciones. ¡La recomiendo totalmente!\\n</span>\\n<div>\\n<h6>\\n Images in this review\\n </h6>\\n</div>\\n</div>\\n</div>\\n</div>\\n<div><span>\\n<span><span>Report</span></span></span>\\n<div><span>Translate review to English</span></div></div></div></div></span></li><div>See more reviews</div></ul></div></div></span></span></div></div></div></div></div></div>Get to Know Us</div></div></div> <div> <div> <div> Your recently viewed items and featured recommendations </div> <div> <div> <div> <div> › </div> <div> View or edit your browsing history </div> </div> <span> After viewing product detail pages, look here to find an easy way to navigate back to pages you are interested in. </span> </div> </div> </div> </div></div></div></body></html>']\n",
 
 
454
  "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n",
455
+ "Final output: {'productTitle': 'Instant Pot RIO, 7-in-1 Electric Multi-Cooker, PressureCooker, SlowCooker, RiceCooker, Steamer, Sauté, Yogurt Maker, & Warmer, Includes App With Over 800 Recipes, 6 Quart', 'price': 79.99, 'manufacturer': 'Amazon.com'}\n",
456
  "++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++\n"
457
  ]
458
  },
459
  {
460
  "data": {
461
  "text/plain": [
462
+ "{'productTitle': 'Instant Pot RIO, 7-in-1 Electric Multi-Cooker, PressureCooker, SlowCooker, RiceCooker, Steamer, Sauté, Yogurt Maker, & Warmer, Includes App With Over 800 Recipes, 6 Quart',\n",
463
+ " 'price': 79.99,\n",
464
+ " 'manufacturer': 'Amazon.com'}"
465
  ]
466
  },
467
+ "execution_count": 21,
468
  "metadata": {},
469
  "output_type": "execute_result"
470
  }
471
  ],
472
  "source": [
473
+ "pipe.run(content=url,is_url=True, schema=schema, hf=False)"
474
  ]
475
  },
476
  {
 
498
  "name": "python",
499
  "nbconvert_exporter": "python",
500
  "pygments_lexer": "ipython3",
501
+ "version": "3.11.9"
502
  }
503
  },
504
  "nbformat": 4,
web2json/__pycache__/ai_extractor.cpython-311.pyc CHANGED
Binary files a/web2json/__pycache__/ai_extractor.cpython-311.pyc and b/web2json/__pycache__/ai_extractor.cpython-311.pyc differ
 
web2json/__pycache__/pipeline.cpython-311.pyc CHANGED
Binary files a/web2json/__pycache__/pipeline.cpython-311.pyc and b/web2json/__pycache__/pipeline.cpython-311.pyc differ
 
web2json/__pycache__/postprocessor.cpython-311.pyc CHANGED
Binary files a/web2json/__pycache__/postprocessor.cpython-311.pyc and b/web2json/__pycache__/postprocessor.cpython-311.pyc differ
 
web2json/__pycache__/preprocessor.cpython-311.pyc CHANGED
Binary files a/web2json/__pycache__/preprocessor.cpython-311.pyc and b/web2json/__pycache__/preprocessor.cpython-311.pyc differ
 
web2json/ai_extractor.py CHANGED
@@ -20,6 +20,9 @@ import json
20
  from langchain_text_splitters import HTMLHeaderTextSplitter
21
  from sentence_transformers import SentenceTransformer
22
  import requests
 
 
 
23
 
24
  class LLMClient(ABC):
25
  """
@@ -47,6 +50,34 @@ class LLMClient(ABC):
47
  """
48
  pass
49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
 
51
  class GeminiLLMClient(LLMClient):
52
  """
@@ -233,7 +264,7 @@ class NvidiaLLMClient(LLMClient):
233
  return results
234
 
235
 
236
- class NvidiaRerankerClient(LLMClient):
237
  """
238
  Concrete implementation of LLMClient for the NVIDIA API (non-streaming).
239
  """
@@ -255,18 +286,55 @@ class NvidiaRerankerClient(LLMClient):
255
  self.model_name = model_name
256
 
257
  @retry_on_ratelimit(max_retries=6, base_delay=0.5, max_delay=5.0)
258
- def call_api(self, prompt: str) -> str:
259
- pass
260
-
261
- def call_batch(self, prompts, max_workers=8):
262
- pass
263
-
264
- from transformers import AutoTokenizer, AutoModelForSequenceClassification
265
- import torch
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
 
267
- import torch
268
- from transformers import AutoTokenizer, AutoModelForCausalLM
269
- from typing import List, Dict
 
270
 
271
 
272
  class HFRerankerClient(LLMClient):
@@ -306,7 +374,7 @@ class HFRerankerClient(LLMClient):
306
  score = torch.softmax(logits, dim=1)[0, 1].item() # probability of relevance
307
  scores.append(score)
308
 
309
- print(f"Scores for passages: {scores}")
310
 
311
  top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
312
  print(f"top indices: {top_indices}")
@@ -355,7 +423,7 @@ class LLMClassifierExtractor(AIExtractor):
355
  Extractor that uses an LLM to classify and extract structured information from text content.
356
  This class is designed to handle classification tasks where the LLM generates structured output based on a provided schema.
357
  """
358
- def __init__(self, reranker: LLMClient, llm_client: LLMClient, prompt_template: str, classifier_prompt: str, ):
359
  """
360
  Initializes the LLMClassifierExtractor with an LLM client and a prompt template.
361
 
@@ -382,18 +450,16 @@ class LLMClassifierExtractor(AIExtractor):
382
 
383
 
384
  def classify_chunks(self, passages, top_k=3, hf: bool = False): # reranker
 
385
  query = self.classifier_prompt
386
 
387
  if hf:
388
- print("Using Hugging Face reranker for classification.")
389
  return self.reranker.rerank(query, passages, top_k=top_k)
390
-
 
391
  # NVIDIA reranker path
392
- responses = self.reranker.client.compress_documents(
393
- query=query,
394
- documents=[Document(page_content=passage) for passage in passages]
395
- )
396
- return [response.page_content for response in responses[:top_k]]
397
 
398
  def extract(self, content, schema, hf: bool = False):
399
  """
@@ -404,10 +470,16 @@ class LLMClassifierExtractor(AIExtractor):
404
  schema (BaseModel): A Pydantic model defining the structure of the expected output.
405
  hf (bool): Whether to use the Hugging Face reranker or NVIDIA (default).
406
  """
407
- chunks = self.chunk_content(content, max_tokens=1500)
408
- print(f"Content successfully chunked into {len(chunks)}.")
409
- print(f"Content successfully chunked: {chunks}")
 
410
  classified_chunks = self.classify_chunks(chunks, hf=hf) # conditional reranker
 
 
 
 
 
411
  filtered_content = "\n\n".join(classified_chunks)
412
 
413
  if not filtered_content:
 
20
  from langchain_text_splitters import HTMLHeaderTextSplitter
21
  from sentence_transformers import SentenceTransformer
22
  import requests
23
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
24
+ import torch
25
+ from typing import List, Dict
26
 
27
  class LLMClient(ABC):
28
  """
 
50
  """
51
  pass
52
 
53
+ class RerankerClient(ABC):
54
+ """
55
+ Abstract base class for reranker APIs.
56
+ """
57
+ def __init__(self, config: dict = None):
58
+ """
59
+ Initializes the RerankerClient with a configuration dictionary.
60
+
61
+ Args:
62
+ config (dict): Configuration settings for the reranker client.
63
+ """
64
+ self.config = config or {}
65
+
66
+ @abstractmethod
67
+ def rerank(self, query: str, passages: List[str], top_k: int = 3) -> List[str]:
68
+ """
69
+ Rerank passages based on relevance to query.
70
+
71
+ Args:
72
+ query (str): Query string.
73
+ passages (List[str]): List of passages.
74
+ top_k (int): Number of top passages to return.
75
+
76
+ Returns:
77
+ List[str]: Top-k most relevant passages.
78
+ """
79
+ pass
80
+
81
 
82
  class GeminiLLMClient(LLMClient):
83
  """
 
264
  return results
265
 
266
 
267
+ class NvidiaRerankerClient(RerankerClient):
268
  """
269
  Concrete implementation of LLMClient for the NVIDIA API (non-streaming).
270
  """
 
286
  self.model_name = model_name
287
 
288
  @retry_on_ratelimit(max_retries=6, base_delay=0.5, max_delay=5.0)
289
+ def rerank(self, query: str, passages: List[str], top_k: int = 3 , threshold: float = 0.5) -> List[Document]:
290
+ # 1. Prepare and send documents for scoring
291
+ docs = [Document(page_content=p) for p in passages]
292
+ # print("Bonjour")
293
+ # print(type(docs),docs)
294
+ # print(type(query),query)
295
+ scored_docs = self.client.compress_documents(
296
+ query=str(query),
297
+ documents=docs
298
+ )
299
+ # print(f"Scored Docs {scored_docs}")
300
+ # 2. Extract raw scores
301
+ raw_scores = np.array([doc.metadata['relevance_score'] for doc in scored_docs], dtype=float)
302
+
303
+ # 3. Softmax normalization
304
+ exp_scores = np.exp(raw_scores - np.max(raw_scores))
305
+ softmax_scores = exp_scores / exp_scores.sum()
306
+
307
+ # 4. (Optional) Min–Max rescale of the softmax outputs
308
+ min_val, max_val = raw_scores.min(), raw_scores.max()
309
+ if max_val > min_val:
310
+ minmax_scores = (raw_scores - min_val) / (max_val - min_val)
311
+ else:
312
+ # all scores equal → set them all to 1
313
+ minmax_scores = np.ones_like(raw_scores)
314
+
315
+ # 5. Attach new scores back to metadata
316
+ for doc, s, mm in zip(scored_docs, softmax_scores, minmax_scores):
317
+ doc.metadata['softmax_score'] = float(s)
318
+ doc.metadata['minmax_score'] = float(mm)
319
+
320
+ # 6. Sort and return top_k by softmax_score
321
+ # Sort by softmax_score descending
322
+ sorted_docs = sorted(
323
+ scored_docs,
324
+ key=lambda d: d.metadata['softmax_score'],
325
+ reverse=True
326
+ )
327
+ # print("Ayeeeee")
328
+ # print("Docs Value:",sorted_docs)
329
+ # Filter by threshold
330
+ filtered_docs = [doc for doc in sorted_docs if doc.metadata['minmax_score'] >= threshold]
331
+ # print("Final", filtered_docs)
332
+ return filtered_docs
333
 
334
+
335
+ # TODO: will I need it ?
336
+ # def call_batch(self, prompts, max_workers=8):
337
+ # pass
338
 
339
 
340
  class HFRerankerClient(LLMClient):
 
374
  score = torch.softmax(logits, dim=1)[0, 1].item() # probability of relevance
375
  scores.append(score)
376
 
377
+ # print(f"Scores for passages: {scores}")
378
 
379
  top_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)[:top_k]
380
  print(f"top indices: {top_indices}")
 
423
  Extractor that uses an LLM to classify and extract structured information from text content.
424
  This class is designed to handle classification tasks where the LLM generates structured output based on a provided schema.
425
  """
426
+ def __init__(self, reranker: RerankerClient, llm_client: LLMClient, prompt_template: str, classifier_prompt: str, ):
427
  """
428
  Initializes the LLMClassifierExtractor with an LLM client and a prompt template.
429
 
 
450
 
451
 
452
  def classify_chunks(self, passages, top_k=3, hf: bool = False): # reranker
453
+ # print("TIME TO CLASSIFY")
454
  query = self.classifier_prompt
455
 
456
  if hf:
457
+ # print("Using Hugging Face reranker for classification.")
458
  return self.reranker.rerank(query, passages, top_k=top_k)
459
+ response = self.reranker.rerank(query,passages)
460
+ # print("DONNNNE")
461
  # NVIDIA reranker path
462
+ return response
 
 
 
 
463
 
464
  def extract(self, content, schema, hf: bool = False):
465
  """
 
470
  schema (BaseModel): A Pydantic model defining the structure of the expected output.
471
  hf (bool): Whether to use the Hugging Face reranker or NVIDIA (default).
472
  """
473
+ # print("TIME TO EXTRACT")
474
+ chunks = self.chunk_content(content, max_tokens=1000)
475
+ # print(f"Content successfully chunked into {len(chunks)}.")
476
+ # print(f"Content successfully chunked: {chunks}")
477
  classified_chunks = self.classify_chunks(chunks, hf=hf) # conditional reranker
478
+ # extracting the content
479
+ classified_chunks = [chunk.page_content for chunk in classified_chunks]
480
+ # print(f"Classified Chunks {len(classified_chunks)}")
481
+ # print(classified_chunks)
482
+ # print('='*80)
483
  filtered_content = "\n\n".join(classified_chunks)
484
 
485
  if not filtered_content: