File size: 45,175 Bytes
1c8d1ca |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 |
var e,t={exports:{}};var i,a,r,n={exports:{}};var s=function(){if(r)return a;r=1;var s=(e||(e=1,function(e){function t(e,t){if(t&&t.documentElement)e=t,t=arguments[2];else if(!e||!e.documentElement)throw new Error("First argument to Readability constructor should be a document object.");if(t=t||{},this._doc=e,this._docJSDOMParser=this._doc.firstChild.__JSDOMParser__,this._articleTitle=null,this._articleByline=null,this._articleDir=null,this._articleSiteName=null,this._attempts=[],this._debug=!!t.debug,this._maxElemsToParse=t.maxElemsToParse||this.DEFAULT_MAX_ELEMS_TO_PARSE,this._nbTopCandidates=t.nbTopCandidates||this.DEFAULT_N_TOP_CANDIDATES,this._charThreshold=t.charThreshold||this.DEFAULT_CHAR_THRESHOLD,this._classesToPreserve=this.CLASSES_TO_PRESERVE.concat(t.classesToPreserve||[]),this._keepClasses=!!t.keepClasses,this._serializer=t.serializer||function(e){return e.innerHTML},this._disableJSONLD=!!t.disableJSONLD,this._allowedVideoRegex=t.allowedVideoRegex||this.REGEXPS.videos,this._flags=this.FLAG_STRIP_UNLIKELYS|this.FLAG_WEIGHT_CLASSES|this.FLAG_CLEAN_CONDITIONALLY,this._debug){let e=function(e){if(e.nodeType==e.TEXT_NODE)return`${e.nodeName} ("${e.textContent}")`;let t=Array.from(e.attributes||[],(function(e){return`${e.name}="${e.value}"`})).join(" ");return`<${e.localName} ${t}>`};this.log=function(){if("undefined"!=typeof console)Array.from(arguments,(t=>t&&t.nodeType==this.ELEMENT_NODE?e(t):t)).unshift("Reader: (Readability)");else if("undefined"!=typeof dump){var t=Array.prototype.map.call(arguments,(function(t){return t&&t.nodeName?e(t):t})).join(" ");dump("Reader: (Readability) "+t+"\n")}}}else this.log=function(){}}t.prototype={FLAG_STRIP_UNLIKELYS:1,FLAG_WEIGHT_CLASSES:2,FLAG_CLEAN_CONDITIONALLY:4,
// https://developer.mozilla.org/en-US/docs/Web/API/Node/nodeType
ELEMENT_NODE:1,TEXT_NODE:3,
// Max number of nodes supported by this parser. Default: 0 (no limit)
DEFAULT_MAX_ELEMS_TO_PARSE:0,
// The number of top candidates to consider when analysing how
// tight the competition is among candidates.
DEFAULT_N_TOP_CANDIDATES:5,
// Element tags to score by default.
DEFAULT_TAGS_TO_SCORE:"section,h2,h3,h4,h5,h6,p,td,pre".toUpperCase().split(","),
// The default number of chars an article must have in order to return a result
DEFAULT_CHAR_THRESHOLD:500,
// All of the regular expressions in use within readability.
// Defined up here so we don't instantiate them repeatedly in loops.
REGEXPS:{
// NOTE: These two regular expressions are duplicated in
// Readability-readerable.js. Please keep both copies in sync.
unlikelyCandidates:/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,okMaybeItsACandidate:/and|article|body|column|content|main|shadow/i,positive:/article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story/i,negative:/-ad-|hidden|^hid$| hid$| hid |^hid |banner|combx|comment|com-|contact|foot|footer|footnote|gdpr|masthead|media|meta|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget/i,extraneous:/print|archive|comment|discuss|e[\-]?mail|share|reply|all|login|sign|single|utility/i,byline:/byline|author|dateline|writtenby|p-author/i,replaceFonts:/<(\/?)font[^>]*>/gi,normalize:/\s{2,}/g,videos:/\/\/(www\.)?((dailymotion|youtube|youtube-nocookie|player\.vimeo|v\.qq)\.com|(archive|upload\.wikimedia)\.org|player\.twitch\.tv)/i,shareElements:/(\b|_)(share|sharedaddy)(\b|_)/i,nextLink:/(next|weiter|continue|>([^\|]|$)|»([^\|]|$))/i,prevLink:/(prev|earl|old|new|<|«)/i,tokenize:/\W+/g,whitespace:/^\s*$/,hasContent:/\S$/,hashUrl:/^#.+/,srcsetUrl:/(\S+)(\s+[\d.]+[xw])?(\s*(?:,|$))/g,b64DataUrl:/^data:\s*([^\s;,]+)\s*;\s*base64\s*,/i,
// Commas as used in Latin, Sindhi, Chinese and various other scripts.
// see: https://en.wikipedia.org/wiki/Comma#Comma_variants
commas:/\u002C|\u060C|\uFE50|\uFE10|\uFE11|\u2E41|\u2E34|\u2E32|\uFF0C/g,
// See: https://schema.org/Article
jsonLdArticleTypes:/^Article|AdvertiserContentArticle|NewsArticle|AnalysisNewsArticle|AskPublicNewsArticle|BackgroundNewsArticle|OpinionNewsArticle|ReportageNewsArticle|ReviewNewsArticle|Report|SatiricalArticle|ScholarlyArticle|MedicalScholarlyArticle|SocialMediaPosting|BlogPosting|LiveBlogPosting|DiscussionForumPosting|TechArticle|APIReference$/},UNLIKELY_ROLES:["menu","menubar","complementary","navigation","alert","alertdialog","dialog"],DIV_TO_P_ELEMS:new Set(["BLOCKQUOTE","DL","DIV","IMG","OL","P","PRE","TABLE","UL"]),ALTER_TO_DIV_EXCEPTIONS:["DIV","ARTICLE","SECTION","P"],PRESENTATIONAL_ATTRIBUTES:["align","background","bgcolor","border","cellpadding","cellspacing","frame","hspace","rules","style","valign","vspace"],DEPRECATED_SIZE_ATTRIBUTE_ELEMS:["TABLE","TH","TD","HR","PRE"],
// The commented out elements qualify as phrasing content but tend to be
// removed by readability when put into paragraphs, so we ignore them here.
PHRASING_ELEMS:[
// "CANVAS", "IFRAME", "SVG", "VIDEO",
"ABBR","AUDIO","B","BDO","BR","BUTTON","CITE","CODE","DATA","DATALIST","DFN","EM","EMBED","I","IMG","INPUT","KBD","LABEL","MARK","MATH","METER","NOSCRIPT","OBJECT","OUTPUT","PROGRESS","Q","RUBY","SAMP","SCRIPT","SELECT","SMALL","SPAN","STRONG","SUB","SUP","TEXTAREA","TIME","VAR","WBR"],
// These are the classes that readability sets itself.
CLASSES_TO_PRESERVE:["page"],
// These are the list of HTML entities that need to be escaped.
HTML_ESCAPE_MAP:{lt:"<",gt:">",amp:"&",quot:'"',apos:"'"},
/**
* Run any post-process modifications to article content as necessary.
*
* @param Element
* @return void
**/
_postProcessContent:function(e){this._fixRelativeUris(e),this._simplifyNestedElements(e),this._keepClasses||this._cleanClasses(e)},
/**
* Iterates over a NodeList, calls `filterFn` for each node and removes node
* if function returned `true`.
*
* If function is not passed, removes all the nodes in node list.
*
* @param NodeList nodeList The nodes to operate on
* @param Function filterFn the function to use as a filter
* @return void
*/
_removeNodes:function(e,t){if(this._docJSDOMParser&&e._isLiveNodeList)throw new Error("Do not pass live node lists to _removeNodes");for(var i=e.length-1;i>=0;i--){var a=e[i],r=a.parentNode;r&&(t&&!t.call(this,a,i,e)||r.removeChild(a))}},
/**
* Iterates over a NodeList, and calls _setNodeTag for each node.
*
* @param NodeList nodeList The nodes to operate on
* @param String newTagName the new tag name to use
* @return void
*/
_replaceNodeTags:function(e,t){if(this._docJSDOMParser&&e._isLiveNodeList)throw new Error("Do not pass live node lists to _replaceNodeTags");for(const i of e)this._setNodeTag(i,t)},
/**
* Iterate over a NodeList, which doesn't natively fully implement the Array
* interface.
*
* For convenience, the current object context is applied to the provided
* iterate function.
*
* @param NodeList nodeList The NodeList.
* @param Function fn The iterate function.
* @return void
*/
_forEachNode:function(e,t){Array.prototype.forEach.call(e,t,this)},
/**
* Iterate over a NodeList, and return the first node that passes
* the supplied test function
*
* For convenience, the current object context is applied to the provided
* test function.
*
* @param NodeList nodeList The NodeList.
* @param Function fn The test function.
* @return void
*/
_findNode:function(e,t){return Array.prototype.find.call(e,t,this)},
/**
* Iterate over a NodeList, return true if any of the provided iterate
* function calls returns true, false otherwise.
*
* For convenience, the current object context is applied to the
* provided iterate function.
*
* @param NodeList nodeList The NodeList.
* @param Function fn The iterate function.
* @return Boolean
*/
_someNode:function(e,t){return Array.prototype.some.call(e,t,this)},
/**
* Iterate over a NodeList, return true if all of the provided iterate
* function calls return true, false otherwise.
*
* For convenience, the current object context is applied to the
* provided iterate function.
*
* @param NodeList nodeList The NodeList.
* @param Function fn The iterate function.
* @return Boolean
*/
_everyNode:function(e,t){return Array.prototype.every.call(e,t,this)},
/**
* Concat all nodelists passed as arguments.
*
* @return ...NodeList
* @return Array
*/
_concatNodeLists:function(){var e=Array.prototype.slice,t=e.call(arguments).map((function(t){return e.call(t)}));return Array.prototype.concat.apply([],t)},_getAllNodesWithTag:function(e,t){return e.querySelectorAll?e.querySelectorAll(t.join(",")):[].concat.apply([],t.map((function(t){var i=e.getElementsByTagName(t);return Array.isArray(i)?i:Array.from(i)})))},
/**
* Removes the class="" attribute from every element in the given
* subtree, except those that match CLASSES_TO_PRESERVE and
* the classesToPreserve array from the options object.
*
* @param Element
* @return void
*/
_cleanClasses:function(e){var t=this._classesToPreserve,i=(e.getAttribute("class")||"").split(/\s+/).filter((function(e){return-1!=t.indexOf(e)})).join(" ");for(i?e.setAttribute("class",i):e.removeAttribute("class"),e=e.firstElementChild;e;e=e.nextElementSibling)this._cleanClasses(e)},
/**
* Converts each <a> and <img> uri in the given element to an absolute URI,
* ignoring #ref URIs.
*
* @param Element
* @return void
*/
_fixRelativeUris:function(e){var t=this._doc.baseURI,i=this._doc.documentURI;function a(e){if(t==i&&"#"==e.charAt(0))return e;try{return new URL(e,t).href}catch(a){}return e}var r=this._getAllNodesWithTag(e,["a"]);this._forEachNode(r,(function(e){var t=e.getAttribute("href");if(t)if(0===t.indexOf("javascript:"))if(1===e.childNodes.length&&e.childNodes[0].nodeType===this.TEXT_NODE){var i=this._doc.createTextNode(e.textContent);e.parentNode.replaceChild(i,e)}else{for(var r=this._doc.createElement("span");e.firstChild;)r.appendChild(e.firstChild);e.parentNode.replaceChild(r,e)}else e.setAttribute("href",a(t))}));var n=this._getAllNodesWithTag(e,["img","picture","figure","video","audio","source"]);this._forEachNode(n,(function(e){var t=e.getAttribute("src"),i=e.getAttribute("poster"),r=e.getAttribute("srcset");if(t&&e.setAttribute("src",a(t)),i&&e.setAttribute("poster",a(i)),r){var n=r.replace(this.REGEXPS.srcsetUrl,(function(e,t,i,r){return a(t)+(i||"")+r}));e.setAttribute("srcset",n)}}))},_simplifyNestedElements:function(e){for(var t=e;t;){if(t.parentNode&&["DIV","SECTION"].includes(t.tagName)&&(!t.id||!t.id.startsWith("readability"))){if(this._isElementWithoutContent(t)){t=this._removeAndGetNext(t);continue}if(this._hasSingleTagInsideElement(t,"DIV")||this._hasSingleTagInsideElement(t,"SECTION")){for(var i=t.children[0],a=0;a<t.attributes.length;a++)i.setAttribute(t.attributes[a].name,t.attributes[a].value);t.parentNode.replaceChild(i,t),t=i;continue}}t=this._getNextNode(t)}},
/**
* Get the article title as an H1.
*
* @return string
**/
_getArticleTitle:function(){var e=this._doc,t="",i="";try{"string"!=typeof(t=i=e.title.trim())&&(t=i=this._getInnerText(e.getElementsByTagName("title")[0]))}catch(h){}var a=!1;function r(e){return e.split(/\s+/).length}if(/ [\|\-\\\/>»] /.test(t))a=/ [\\\/>»] /.test(t),r(t=i.replace(/(.*)[\|\-\\\/>»] .*/gi,"$1"))<3&&(t=i.replace(/[^\|\-\\\/>»]*[\|\-\\\/>»](.*)/gi,"$1"));else if(-1!==t.indexOf(": ")){var n=this._concatNodeLists(e.getElementsByTagName("h1"),e.getElementsByTagName("h2")),s=t.trim();this._someNode(n,(function(e){return e.textContent.trim()===s}))||(r(t=i.substring(i.lastIndexOf(":")+1))<3?t=i.substring(i.indexOf(":")+1):r(i.substr(0,i.indexOf(":")))>5&&(t=i))}else if(t.length>150||t.length<15){var l=e.getElementsByTagName("h1");1===l.length&&(t=this._getInnerText(l[0]))}var o=r(t=t.trim().replace(this.REGEXPS.normalize," "));return o<=4&&(!a||o!=r(i.replace(/[\|\-\\\/>»]+/g,""))-1)&&(t=i),t},
/**
* Prepare the HTML document for readability to scrape it.
* This includes things like stripping javascript, CSS, and handling terrible markup.
*
* @return void
**/
_prepDocument:function(){var e=this._doc;this._removeNodes(this._getAllNodesWithTag(e,["style"])),e.body&&this._replaceBrs(e.body),this._replaceNodeTags(this._getAllNodesWithTag(e,["font"]),"SPAN")},
/**
* Finds the next node, starting from the given node, and ignoring
* whitespace in between. If the given node is an element, the same node is
* returned.
*/
_nextNode:function(e){for(var t=e;t&&t.nodeType!=this.ELEMENT_NODE&&this.REGEXPS.whitespace.test(t.textContent);)t=t.nextSibling;return t},
/**
* Replaces 2 or more successive <br> elements with a single <p>.
* Whitespace between <br> elements are ignored. For example:
* <div>foo<br>bar<br> <br><br>abc</div>
* will become:
* <div>foo<br>bar<p>abc</p></div>
*/
_replaceBrs:function(e){this._forEachNode(this._getAllNodesWithTag(e,["br"]),(function(e){for(var t=e.nextSibling,i=!1;(t=this._nextNode(t))&&"BR"==t.tagName;){i=!0;var a=t.nextSibling;t.parentNode.removeChild(t),t=a}if(i){var r=this._doc.createElement("p");for(e.parentNode.replaceChild(r,e),t=r.nextSibling;t;){if("BR"==t.tagName){var n=this._nextNode(t.nextSibling);if(n&&"BR"==n.tagName)break}if(!this._isPhrasingContent(t))break;var s=t.nextSibling;r.appendChild(t),t=s}for(;r.lastChild&&this._isWhitespace(r.lastChild);)r.removeChild(r.lastChild);"P"===r.parentNode.tagName&&this._setNodeTag(r.parentNode,"DIV")}}))},_setNodeTag:function(e,t){if(this.log("_setNodeTag",e,t),this._docJSDOMParser)return e.localName=t.toLowerCase(),e.tagName=t.toUpperCase(),e;for(var i=e.ownerDocument.createElement(t);e.firstChild;)i.appendChild(e.firstChild);e.parentNode.replaceChild(i,e),e.readability&&(i.readability=e.readability);for(var a=0;a<e.attributes.length;a++)try{i.setAttribute(e.attributes[a].name,e.attributes[a].value)}catch(r){}return i},
/**
* Prepare the article node for display. Clean out any inline styles,
* iframes, forms, strip extraneous <p> tags, etc.
*
* @param Element
* @return void
**/
_prepArticle:function(e){this._cleanStyles(e),this._markDataTables(e),this._fixLazyImages(e),this._cleanConditionally(e,"form"),this._cleanConditionally(e,"fieldset"),this._clean(e,"object"),this._clean(e,"embed"),this._clean(e,"footer"),this._clean(e,"link"),this._clean(e,"aside");var t=this.DEFAULT_CHAR_THRESHOLD;this._forEachNode(e.children,(function(e){this._cleanMatchedNodes(e,(function(e,i){return this.REGEXPS.shareElements.test(i)&&e.textContent.length<t}))})),this._clean(e,"iframe"),this._clean(e,"input"),this._clean(e,"textarea"),this._clean(e,"select"),this._clean(e,"button"),this._cleanHeaders(e),this._cleanConditionally(e,"table"),this._cleanConditionally(e,"ul"),this._cleanConditionally(e,"div"),this._replaceNodeTags(this._getAllNodesWithTag(e,["h1"]),"h2"),this._removeNodes(this._getAllNodesWithTag(e,["p"]),(function(e){return 0===e.getElementsByTagName("img").length+e.getElementsByTagName("embed").length+e.getElementsByTagName("object").length+e.getElementsByTagName("iframe").length&&!this._getInnerText(e,!1)})),this._forEachNode(this._getAllNodesWithTag(e,["br"]),(function(e){var t=this._nextNode(e.nextSibling);t&&"P"==t.tagName&&e.parentNode.removeChild(e)})),this._forEachNode(this._getAllNodesWithTag(e,["table"]),(function(e){var t=this._hasSingleTagInsideElement(e,"TBODY")?e.firstElementChild:e;if(this._hasSingleTagInsideElement(t,"TR")){var i=t.firstElementChild;if(this._hasSingleTagInsideElement(i,"TD")){var a=i.firstElementChild;a=this._setNodeTag(a,this._everyNode(a.childNodes,this._isPhrasingContent)?"P":"DIV"),e.parentNode.replaceChild(a,e)}}}))},
/**
* Initialize a node with the readability object. Also checks the
* className/id for special names to add to its score.
*
* @param Element
* @return void
**/
_initializeNode:function(e){switch(e.readability={contentScore:0},e.tagName){case"DIV":e.readability.contentScore+=5;break;case"PRE":case"TD":case"BLOCKQUOTE":e.readability.contentScore+=3;break;case"ADDRESS":case"OL":case"UL":case"DL":case"DD":case"DT":case"LI":case"FORM":e.readability.contentScore-=3;break;case"H1":case"H2":case"H3":case"H4":case"H5":case"H6":case"TH":e.readability.contentScore-=5}e.readability.contentScore+=this._getClassWeight(e)},_removeAndGetNext:function(e){var t=this._getNextNode(e,!0);return e.parentNode.removeChild(e),t},
/**
* Traverse the DOM from node to node, starting at the node passed in.
* Pass true for the second parameter to indicate this node itself
* (and its kids) are going away, and we want the next node over.
*
* Calling this in a loop will traverse the DOM depth-first.
*/
_getNextNode:function(e,t){if(!t&&e.firstElementChild)return e.firstElementChild;if(e.nextElementSibling)return e.nextElementSibling;do{e=e.parentNode}while(e&&!e.nextElementSibling);return e&&e.nextElementSibling},
// compares second text to first one
// 1 = same text, 0 = completely different text
// works the way that it splits both texts into words and then finds words that are unique in second text
// the result is given by the lower length of unique parts
_textSimilarity:function(e,t){var i=e.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean),a=t.toLowerCase().split(this.REGEXPS.tokenize).filter(Boolean);return i.length&&a.length?1-a.filter((e=>!i.includes(e))).join(" ").length/a.join(" ").length:0},_checkByline:function(e,t){if(this._articleByline)return!1;if(void 0!==e.getAttribute)var i=e.getAttribute("rel"),a=e.getAttribute("itemprop");return!(!("author"===i||a&&-1!==a.indexOf("author")||this.REGEXPS.byline.test(t))||!this._isValidByline(e.textContent)||(this._articleByline=e.textContent.trim(),0))},_getNodeAncestors:function(e,t){t=t||0;for(var i=0,a=[];e.parentNode&&(a.push(e.parentNode),!t||++i!==t);)e=e.parentNode;return a},
/***
* grabArticle - Using a variety of metrics (content score, classname, element types), find the content that is
* most likely to be the stuff a user wants to read. Then return it wrapped up in a div.
*
* @param page a document to run upon. Needs to be a full document, complete with body.
* @return Element
**/
_grabArticle:function(e){this.log("**** grabArticle ****");var t=this._doc,i=null!==e;if(!(e=e||this._doc.body))return this.log("No body found in document. Abort."),null;for(var a=e.innerHTML;;){this.log("Starting grabArticle loop");var r=this._flagIsActive(this.FLAG_STRIP_UNLIKELYS),n=[],s=this._doc.documentElement;let V=!0;for(;s;){"HTML"===s.tagName&&(this._articleLang=s.getAttribute("lang"));var l=s.className+" "+s.id;if(this._isProbablyVisible(s))if("true"!=s.getAttribute("aria-modal")||"dialog"!=s.getAttribute("role"))if(this._checkByline(s,l))s=this._removeAndGetNext(s);else if(V&&this._headerDuplicatesTitle(s))this.log("Removing header: ",s.textContent.trim(),this._articleTitle.trim()),V=!1,s=this._removeAndGetNext(s);else{if(r){if(this.REGEXPS.unlikelyCandidates.test(l)&&!this.REGEXPS.okMaybeItsACandidate.test(l)&&!this._hasAncestorTag(s,"table")&&!this._hasAncestorTag(s,"code")&&"BODY"!==s.tagName&&"A"!==s.tagName){this.log("Removing unlikely candidate - "+l),s=this._removeAndGetNext(s);continue}if(this.UNLIKELY_ROLES.includes(s.getAttribute("role"))){this.log("Removing content with role "+s.getAttribute("role")+" - "+l),s=this._removeAndGetNext(s);continue}}if("DIV"!==s.tagName&&"SECTION"!==s.tagName&&"HEADER"!==s.tagName&&"H1"!==s.tagName&&"H2"!==s.tagName&&"H3"!==s.tagName&&"H4"!==s.tagName&&"H5"!==s.tagName&&"H6"!==s.tagName||!this._isElementWithoutContent(s)){if(-1!==this.DEFAULT_TAGS_TO_SCORE.indexOf(s.tagName)&&n.push(s),"DIV"===s.tagName){for(var o=null,h=s.firstChild;h;){var c=h.nextSibling;if(this._isPhrasingContent(h))null!==o?o.appendChild(h):this._isWhitespace(h)||(o=t.createElement("p"),s.replaceChild(o,h),o.appendChild(h));else if(null!==o){for(;o.lastChild&&this._isWhitespace(o.lastChild);)o.removeChild(o.lastChild);o=null}h=c}if(this._hasSingleTagInsideElement(s,"P")&&this._getLinkDensity(s)<.25){var d=s.children[0];s.parentNode.replaceChild(d,s),s=d,n.push(s)}else this._hasChildBlockElement(s)||(s=this._setNodeTag(s,"P"),n.push(s))}s=this._getNextNode(s)}else s=this._removeAndGetNext(s)}else s=this._removeAndGetNext(s);else this.log("Removing hidden node - "+l),s=this._removeAndGetNext(s)}var g=[];this._forEachNode(n,(function(e){if(e.parentNode&&void 0!==e.parentNode.tagName){var t=this._getInnerText(e);if(!(t.length<25)){var i=this._getNodeAncestors(e,5);if(0!==i.length){var a=0;a+=1,a+=t.split(this.REGEXPS.commas).length,a+=Math.min(Math.floor(t.length/100),3),this._forEachNode(i,(function(e,t){if(e.tagName&&e.parentNode&&void 0!==e.parentNode.tagName){if(void 0===e.readability&&(this._initializeNode(e),g.push(e)),0===t)var i=1;else i=1===t?2:3*t;e.readability.contentScore+=a/i}}))}}}}));for(var u=[],m=0,_=g.length;m<_;m+=1){var f=g[m],p=f.readability.contentScore*(1-this._getLinkDensity(f));f.readability.contentScore=p,this.log("Candidate:",f,"with score "+p);for(var N=0;N<this._nbTopCandidates;N++){var E=u[N];if(!E||p>E.readability.contentScore){u.splice(N,0,f),u.length>this._nbTopCandidates&&u.pop();break}}}var b,T=u[0]||null,A=!1;if(null===T||"BODY"===T.tagName){for(T=t.createElement("DIV"),A=!0;e.firstChild;)this.log("Moving child out:",e.firstChild),T.appendChild(e.firstChild);e.appendChild(T),this._initializeNode(T)}else if(T){for(var v=[],y=1;y<u.length;y++)u[y].readability.contentScore/T.readability.contentScore>=.75&&v.push(this._getNodeAncestors(u[y]));if(v.length>=3)for(b=T.parentNode;"BODY"!==b.tagName;){for(var S=0,C=0;C<v.length&&S<3;C++)S+=Number(v[C].includes(b));if(S>=3){T=b;break}b=b.parentNode}T.readability||this._initializeNode(T),b=T.parentNode;for(var L=T.readability.contentScore,x=L/3;"BODY"!==b.tagName;)if(b.readability){var I=b.readability.contentScore;if(I<x)break;if(I>L){T=b;break}L=b.readability.contentScore,b=b.parentNode}else b=b.parentNode;for(b=T.parentNode;"BODY"!=b.tagName&&1==b.children.length;)b=(T=b).parentNode;T.readability||this._initializeNode(T)}var D=t.createElement("DIV");i&&(D.id="readability-content");for(var R=Math.max(10,.2*T.readability.contentScore),P=(b=T.parentNode).children,O=0,w=P.length;O<w;O++){var B=P[O],M=!1;if(this.log("Looking at sibling node:",B,B.readability?"with score "+B.readability.contentScore:""),this.log("Sibling has score",B.readability?B.readability.contentScore:"Unknown"),B===T)M=!0;else{var G=0;if(B.className===T.className&&""!==T.className&&(G+=.2*T.readability.contentScore),B.readability&&B.readability.contentScore+G>=R)M=!0;else if("P"===B.nodeName){var k=this._getLinkDensity(B),H=this._getInnerText(B),U=H.length;(U>80&&k<.25||U<80&&U>0&&0===k&&-1!==H.search(/\.( |$)/))&&(M=!0)}}M&&(this.log("Appending node:",B),-1===this.ALTER_TO_DIV_EXCEPTIONS.indexOf(B.nodeName)&&(this.log("Altering sibling:",B,"to div."),B=this._setNodeTag(B,"DIV")),D.appendChild(B),P=b.children,O-=1,w-=1)}if(this._debug&&this.log("Article content pre-prep: "+D.innerHTML),this._prepArticle(D),this._debug&&this.log("Article content post-prep: "+D.innerHTML),A)T.id="readability-page-1",T.className="page";else{var F=t.createElement("DIV");for(F.id="readability-page-1",F.className="page";D.firstChild;)F.appendChild(D.firstChild);D.appendChild(F)}this._debug&&this.log("Article content after paging: "+D.innerHTML);var W=!0,X=this._getInnerText(D,!0).length;if(X<this._charThreshold)if(W=!1,e.innerHTML=a,this._flagIsActive(this.FLAG_STRIP_UNLIKELYS))this._removeFlag(this.FLAG_STRIP_UNLIKELYS),this._attempts.push({articleContent:D,textLength:X});else if(this._flagIsActive(this.FLAG_WEIGHT_CLASSES))this._removeFlag(this.FLAG_WEIGHT_CLASSES),this._attempts.push({articleContent:D,textLength:X});else if(this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY))this._removeFlag(this.FLAG_CLEAN_CONDITIONALLY),this._attempts.push({articleContent:D,textLength:X});else{if(this._attempts.push({articleContent:D,textLength:X}),this._attempts.sort((function(e,t){return t.textLength-e.textLength})),!this._attempts[0].textLength)return null;D=this._attempts[0].articleContent,W=!0}if(W){var j=[b,T].concat(this._getNodeAncestors(b));return this._someNode(j,(function(e){if(!e.tagName)return!1;var t=e.getAttribute("dir");return!!t&&(this._articleDir=t,!0)})),D}}},
/**
* Check whether the input string could be a byline.
* This verifies that the input is a string, and that the length
* is less than 100 chars.
*
* @param possibleByline {string} - a string to check whether its a byline.
* @return Boolean - whether the input string is a byline.
*/
_isValidByline:function(e){return("string"==typeof e||e instanceof String)&&(e=e.trim()).length>0&&e.length<100},
/**
* Converts some of the common HTML entities in string to their corresponding characters.
*
* @param str {string} - a string to unescape.
* @return string without HTML entity.
*/
_unescapeHtmlEntities:function(e){if(!e)return e;var t=this.HTML_ESCAPE_MAP;return e.replace(/&(quot|amp|apos|lt|gt);/g,(function(e,i){return t[i]})).replace(/&#(?:x([0-9a-z]{1,4})|([0-9]{1,4}));/gi,(function(e,t,i){var a=parseInt(t||i,t?16:10);return String.fromCharCode(a)}))},
/**
* Try to extract metadata from JSON-LD object.
* For now, only Schema.org objects of type Article or its subtypes are supported.
* @return Object with any metadata that could be extracted (possibly none)
*/
_getJSONLD:function(e){var t,i=this._getAllNodesWithTag(e,["script"]);return this._forEachNode(i,(function(e){if(!t&&"application/ld+json"===e.getAttribute("type"))try{var i=e.textContent.replace(/^\s*<!\[CDATA\[|\]\]>\s*$/g,""),a=JSON.parse(i);if(!a["@context"]||!a["@context"].match(/^https?\:\/\/schema\.org$/))return;if(!a["@type"]&&Array.isArray(a["@graph"])&&(a=a["@graph"].find((function(e){return(e["@type"]||"").match(this.REGEXPS.jsonLdArticleTypes)}))),!a||!a["@type"]||!a["@type"].match(this.REGEXPS.jsonLdArticleTypes))return;if(t={},"string"==typeof a.name&&"string"==typeof a.headline&&a.name!==a.headline){var r=this._getArticleTitle(),n=this._textSimilarity(a.name,r)>.75,s=this._textSimilarity(a.headline,r)>.75;t.title=s&&!n?a.headline:a.name}else"string"==typeof a.name?t.title=a.name.trim():"string"==typeof a.headline&&(t.title=a.headline.trim());return a.author&&("string"==typeof a.author.name?t.byline=a.author.name.trim():Array.isArray(a.author)&&a.author[0]&&"string"==typeof a.author[0].name&&(t.byline=a.author.filter((function(e){return e&&"string"==typeof e.name})).map((function(e){return e.name.trim()})).join(", "))),"string"==typeof a.description&&(t.excerpt=a.description.trim()),a.publisher&&"string"==typeof a.publisher.name&&(t.siteName=a.publisher.name.trim()),void("string"==typeof a.datePublished&&(t.datePublished=a.datePublished.trim()))}catch(l){this.log(l.message)}})),t||{}},
/**
* Attempts to get excerpt and byline metadata for the article.
*
* @param {Object} jsonld — object containing any metadata that
* could be extracted from JSON-LD object.
*
* @return Object with optional "excerpt" and "byline" properties
*/
_getArticleMetadata:function(e){var t={},i={},a=this._doc.getElementsByTagName("meta"),r=/\s*(article|dc|dcterm|og|twitter)\s*:\s*(author|creator|description|published_time|title|site_name)\s*/gi,n=/^\s*(?:(dc|dcterm|og|twitter|weibo:(article|webpage))\s*[\.:]\s*)?(author|creator|description|title|site_name)\s*$/i;return this._forEachNode(a,(function(e){var t=e.getAttribute("name"),a=e.getAttribute("property"),s=e.getAttribute("content");if(s){var l=null,o=null;a&&(l=a.match(r))&&(o=l[0].toLowerCase().replace(/\s/g,""),i[o]=s.trim()),!l&&t&&n.test(t)&&(o=t,s&&(o=o.toLowerCase().replace(/\s/g,"").replace(/\./g,":"),i[o]=s.trim()))}})),t.title=e.title||i["dc:title"]||i["dcterm:title"]||i["og:title"]||i["weibo:article:title"]||i["weibo:webpage:title"]||i.title||i["twitter:title"],t.title||(t.title=this._getArticleTitle()),t.byline=e.byline||i["dc:creator"]||i["dcterm:creator"]||i.author,t.excerpt=e.excerpt||i["dc:description"]||i["dcterm:description"]||i["og:description"]||i["weibo:article:description"]||i["weibo:webpage:description"]||i.description||i["twitter:description"],t.siteName=e.siteName||i["og:site_name"],t.publishedTime=e.datePublished||i["article:published_time"]||null,t.title=this._unescapeHtmlEntities(t.title),t.byline=this._unescapeHtmlEntities(t.byline),t.excerpt=this._unescapeHtmlEntities(t.excerpt),t.siteName=this._unescapeHtmlEntities(t.siteName),t.publishedTime=this._unescapeHtmlEntities(t.publishedTime),t},
/**
* Check if node is image, or if node contains exactly only one image
* whether as a direct child or as its descendants.
*
* @param Element
**/
_isSingleImage:function(e){return"IMG"===e.tagName||1===e.children.length&&""===e.textContent.trim()&&this._isSingleImage(e.children[0])},
/**
* Find all <noscript> that are located after <img> nodes, and which contain only one
* <img> element. Replace the first image with the image from inside the <noscript> tag,
* and remove the <noscript> tag. This improves the quality of the images we use on
* some sites (e.g. Medium).
*
* @param Element
**/
_unwrapNoscriptImages:function(e){var t=Array.from(e.getElementsByTagName("img"));this._forEachNode(t,(function(e){for(var t=0;t<e.attributes.length;t++){var i=e.attributes[t];switch(i.name){case"src":case"srcset":case"data-src":case"data-srcset":return}if(/\.(jpg|jpeg|png|webp)/i.test(i.value))return}e.parentNode.removeChild(e)}));var i=Array.from(e.getElementsByTagName("noscript"));this._forEachNode(i,(function(t){var i=e.createElement("div");if(i.innerHTML=t.innerHTML,this._isSingleImage(i)){var a=t.previousElementSibling;if(a&&this._isSingleImage(a)){var r=a;"IMG"!==r.tagName&&(r=a.getElementsByTagName("img")[0]);for(var n=i.getElementsByTagName("img")[0],s=0;s<r.attributes.length;s++){var l=r.attributes[s];if(""!==l.value&&("src"===l.name||"srcset"===l.name||/\.(jpg|jpeg|png|webp)/i.test(l.value))){if(n.getAttribute(l.name)===l.value)continue;var o=l.name;n.hasAttribute(o)&&(o="data-old-"+o),n.setAttribute(o,l.value)}}t.parentNode.replaceChild(i.firstElementChild,a)}}}))},
/**
* Removes script tags from the document.
*
* @param Element
**/
_removeScripts:function(e){this._removeNodes(this._getAllNodesWithTag(e,["script","noscript"]))},
/**
* Check if this node has only whitespace and a single element with given tag
* Returns false if the DIV node contains non-empty text nodes
* or if it contains no element with given tag or more than 1 element.
*
* @param Element
* @param string tag of child element
**/
_hasSingleTagInsideElement:function(e,t){return 1==e.children.length&&e.children[0].tagName===t&&!this._someNode(e.childNodes,(function(e){return e.nodeType===this.TEXT_NODE&&this.REGEXPS.hasContent.test(e.textContent)}))},_isElementWithoutContent:function(e){return e.nodeType===this.ELEMENT_NODE&&0==e.textContent.trim().length&&(0==e.children.length||e.children.length==e.getElementsByTagName("br").length+e.getElementsByTagName("hr").length)},
/**
* Determine whether element has any children block level elements.
*
* @param Element
*/
_hasChildBlockElement:function(e){return this._someNode(e.childNodes,(function(e){return this.DIV_TO_P_ELEMS.has(e.tagName)||this._hasChildBlockElement(e)}))},
/***
* Determine if a node qualifies as phrasing content.
* https://developer.mozilla.org/en-US/docs/Web/Guide/HTML/Content_categories#Phrasing_content
**/
_isPhrasingContent:function(e){return e.nodeType===this.TEXT_NODE||-1!==this.PHRASING_ELEMS.indexOf(e.tagName)||("A"===e.tagName||"DEL"===e.tagName||"INS"===e.tagName)&&this._everyNode(e.childNodes,this._isPhrasingContent)},_isWhitespace:function(e){return e.nodeType===this.TEXT_NODE&&0===e.textContent.trim().length||e.nodeType===this.ELEMENT_NODE&&"BR"===e.tagName},
/**
* Get the inner text of a node - cross browser compatibly.
* This also strips out any excess whitespace to be found.
*
* @param Element
* @param Boolean normalizeSpaces (default: true)
* @return string
**/
_getInnerText:function(e,t){t=void 0===t||t;var i=e.textContent.trim();return t?i.replace(this.REGEXPS.normalize," "):i},
/**
* Get the number of times a string s appears in the node e.
*
* @param Element
* @param string - what to split on. Default is ","
* @return number (integer)
**/
_getCharCount:function(e,t){return t=t||",",this._getInnerText(e).split(t).length-1},
/**
* Remove the style attribute on every e and under.
* TODO: Test if getElementsByTagName(*) is faster.
*
* @param Element
* @return void
**/
_cleanStyles:function(e){if(e&&"svg"!==e.tagName.toLowerCase()){for(var t=0;t<this.PRESENTATIONAL_ATTRIBUTES.length;t++)e.removeAttribute(this.PRESENTATIONAL_ATTRIBUTES[t]);-1!==this.DEPRECATED_SIZE_ATTRIBUTE_ELEMS.indexOf(e.tagName)&&(e.removeAttribute("width"),e.removeAttribute("height"));for(var i=e.firstElementChild;null!==i;)this._cleanStyles(i),i=i.nextElementSibling}},
/**
* Get the density of links as a percentage of the content
* This is the amount of text that is inside a link divided by the total text in the node.
*
* @param Element
* @return number (float)
**/
_getLinkDensity:function(e){var t=this._getInnerText(e).length;if(0===t)return 0;var i=0;return this._forEachNode(e.getElementsByTagName("a"),(function(e){var t=e.getAttribute("href"),a=t&&this.REGEXPS.hashUrl.test(t)?.3:1;i+=this._getInnerText(e).length*a})),i/t},
/**
* Get an elements class/id weight. Uses regular expressions to tell if this
* element looks good or bad.
*
* @param Element
* @return number (Integer)
**/
_getClassWeight:function(e){if(!this._flagIsActive(this.FLAG_WEIGHT_CLASSES))return 0;var t=0;return"string"==typeof e.className&&""!==e.className&&(this.REGEXPS.negative.test(e.className)&&(t-=25),this.REGEXPS.positive.test(e.className)&&(t+=25)),"string"==typeof e.id&&""!==e.id&&(this.REGEXPS.negative.test(e.id)&&(t-=25),this.REGEXPS.positive.test(e.id)&&(t+=25)),t},
/**
* Clean a node of all elements of type "tag".
* (Unless it's a youtube/vimeo video. People love movies.)
*
* @param Element
* @param string tag to clean
* @return void
**/
_clean:function(e,t){var i=-1!==["object","embed","iframe"].indexOf(t);this._removeNodes(this._getAllNodesWithTag(e,[t]),(function(e){if(i){for(var t=0;t<e.attributes.length;t++)if(this._allowedVideoRegex.test(e.attributes[t].value))return!1;if("object"===e.tagName&&this._allowedVideoRegex.test(e.innerHTML))return!1}return!0}))},
/**
* Check if a given node has one of its ancestor tag name matching the
* provided one.
* @param HTMLElement node
* @param String tagName
* @param Number maxDepth
* @param Function filterFn a filter to invoke to determine whether this node 'counts'
* @return Boolean
*/
_hasAncestorTag:function(e,t,i,a){i=i||3,t=t.toUpperCase();for(var r=0;e.parentNode;){if(i>0&&r>i)return!1;if(e.parentNode.tagName===t&&(!a||a(e.parentNode)))return!0;e=e.parentNode,r++}return!1},
/**
* Return an object indicating how many rows and columns this table has.
*/
_getRowAndColumnCount:function(e){for(var t=0,i=0,a=e.getElementsByTagName("tr"),r=0;r<a.length;r++){var n=a[r].getAttribute("rowspan")||0;n&&(n=parseInt(n,10)),t+=n||1;for(var s=0,l=a[r].getElementsByTagName("td"),o=0;o<l.length;o++){var h=l[o].getAttribute("colspan")||0;h&&(h=parseInt(h,10)),s+=h||1}i=Math.max(i,s)}return{rows:t,columns:i}},
/**
* Look for 'data' (as opposed to 'layout') tables, for which we use
* similar checks as
* https://searchfox.org/mozilla-central/rev/f82d5c549f046cb64ce5602bfd894b7ae807c8f8/accessible/generic/TableAccessible.cpp#19
*/
_markDataTables:function(e){for(var t=e.getElementsByTagName("table"),i=0;i<t.length;i++){var a=t[i];if("presentation"!=a.getAttribute("role"))if("0"!=a.getAttribute("datatable"))if(a.getAttribute("summary"))a._readabilityDataTable=!0;else{var r=a.getElementsByTagName("caption")[0];if(r&&r.childNodes.length>0)a._readabilityDataTable=!0;else if(["col","colgroup","tfoot","thead","th"].some((function(e){return!!a.getElementsByTagName(e)[0]})))this.log("Data table because found data-y descendant"),a._readabilityDataTable=!0;else if(a.getElementsByTagName("table")[0])a._readabilityDataTable=!1;else{var n=this._getRowAndColumnCount(a);n.rows>=10||n.columns>4?a._readabilityDataTable=!0:a._readabilityDataTable=n.rows*n.columns>10}}else a._readabilityDataTable=!1;else a._readabilityDataTable=!1}},
/* convert images and figures that have properties like data-src into images that can be loaded without JS */
_fixLazyImages:function(e){this._forEachNode(this._getAllNodesWithTag(e,["img","picture","figure"]),(function(e){if(e.src&&this.REGEXPS.b64DataUrl.test(e.src)){if("image/svg+xml"===this.REGEXPS.b64DataUrl.exec(e.src)[1])return;for(var t=!1,i=0;i<e.attributes.length;i++){var a=e.attributes[i];if("src"!==a.name&&/\.(jpg|jpeg|png|webp)/i.test(a.value)){t=!0;break}}if(t){var r=e.src.search(/base64\s*/i)+7;e.src.length-r<133&&e.removeAttribute("src")}}if(!(e.src||e.srcset&&"null"!=e.srcset)||-1!==e.className.toLowerCase().indexOf("lazy"))for(var n=0;n<e.attributes.length;n++)if("src"!==(a=e.attributes[n]).name&&"srcset"!==a.name&&"alt"!==a.name){var s=null;if(/\.(jpg|jpeg|png|webp)\s+\d/.test(a.value)?s="srcset":/^\s*\S+\.(jpg|jpeg|png|webp)\S*\s*$/.test(a.value)&&(s="src"),s)if("IMG"===e.tagName||"PICTURE"===e.tagName)e.setAttribute(s,a.value);else if("FIGURE"===e.tagName&&!this._getAllNodesWithTag(e,["img","picture"]).length){var l=this._doc.createElement("img");l.setAttribute(s,a.value),e.appendChild(l)}}}))},_getTextDensity:function(e,t){var i=this._getInnerText(e,!0).length;if(0===i)return 0;var a=0,r=this._getAllNodesWithTag(e,t);return this._forEachNode(r,(e=>a+=this._getInnerText(e,!0).length)),a/i},
/**
* Clean an element of all tags of type "tag" if they look fishy.
* "Fishy" is an algorithm based on content length, classnames, link density, number of images & embeds, etc.
*
* @return void
**/
_cleanConditionally:function(e,t){this._flagIsActive(this.FLAG_CLEAN_CONDITIONALLY)&&this._removeNodes(this._getAllNodesWithTag(e,[t]),(function(e){var i=function(e){return e._readabilityDataTable},a="ul"===t||"ol"===t;if(!a){var r=0,n=this._getAllNodesWithTag(e,["ul","ol"]);this._forEachNode(n,(e=>r+=this._getInnerText(e).length)),a=r/this._getInnerText(e).length>.9}if("table"===t&&i(e))return!1;if(this._hasAncestorTag(e,"table",-1,i))return!1;if(this._hasAncestorTag(e,"code"))return!1;var s=this._getClassWeight(e);if(this.log("Cleaning Conditionally",e),s+0<0)return!0;if(this._getCharCount(e,",")<10){for(var l=e.getElementsByTagName("p").length,o=e.getElementsByTagName("img").length,h=e.getElementsByTagName("li").length-100,c=e.getElementsByTagName("input").length,d=this._getTextDensity(e,["h1","h2","h3","h4","h5","h6"]),g=0,u=this._getAllNodesWithTag(e,["object","embed","iframe"]),m=0;m<u.length;m++){for(var _=0;_<u[m].attributes.length;_++)if(this._allowedVideoRegex.test(u[m].attributes[_].value))return!1;if("object"===u[m].tagName&&this._allowedVideoRegex.test(u[m].innerHTML))return!1;g++}var f=this._getLinkDensity(e),p=this._getInnerText(e).length,N=o>1&&l/o<.5&&!this._hasAncestorTag(e,"figure")||!a&&h>l||c>Math.floor(l/3)||!a&&d<.9&&p<25&&(0===o||o>2)&&!this._hasAncestorTag(e,"figure")||!a&&s<25&&f>.2||s>=25&&f>.5||1===g&&p<75||g>1;if(a&&N){for(var E=0;E<e.children.length;E++)if(e.children[E].children.length>1)return N;if(o==e.getElementsByTagName("li").length)return!1}return N}return!1}))},
/**
* Clean out elements that match the specified conditions
*
* @param Element
* @param Function determines whether a node should be removed
* @return void
**/
_cleanMatchedNodes:function(e,t){for(var i=this._getNextNode(e,!0),a=this._getNextNode(e);a&&a!=i;)a=t.call(this,a,a.className+" "+a.id)?this._removeAndGetNext(a):this._getNextNode(a)},
/**
* Clean out spurious headers from an Element.
*
* @param Element
* @return void
**/
_cleanHeaders:function(e){let t=this._getAllNodesWithTag(e,["h1","h2"]);this._removeNodes(t,(function(e){let t=this._getClassWeight(e)<0;return t&&this.log("Removing header with low class weight:",e),t}))},
/**
* Check if this node is an H1 or H2 element whose content is mostly
* the same as the article title.
*
* @param Element the node to check.
* @return boolean indicating whether this is a title-like header.
*/
_headerDuplicatesTitle:function(e){if("H1"!=e.tagName&&"H2"!=e.tagName)return!1;var t=this._getInnerText(e,!1);return this.log("Evaluating similarity of header:",t,this._articleTitle),this._textSimilarity(this._articleTitle,t)>.75},_flagIsActive:function(e){return(this._flags&e)>0},_removeFlag:function(e){this._flags=this._flags&~e},_isProbablyVisible:function(e){return(!e.style||"none"!=e.style.display)&&(!e.style||"hidden"!=e.style.visibility)&&!e.hasAttribute("hidden")&&(!e.hasAttribute("aria-hidden")||"true"!=e.getAttribute("aria-hidden")||e.className&&e.className.indexOf&&-1!==e.className.indexOf("fallback-image"))},
/**
* Runs readability.
*
* Workflow:
* 1. Prep the document by removing script tags, css, etc.
* 2. Build readability's DOM tree.
* 3. Grab the article content from the current dom tree.
* 4. Replace the current DOM tree with the new one.
* 5. Read peacefully.
*
* @return void
**/
parse:function(){if(this._maxElemsToParse>0){var e=this._doc.getElementsByTagName("*").length;if(e>this._maxElemsToParse)throw new Error("Aborting parsing document; "+e+" elements found")}this._unwrapNoscriptImages(this._doc);var t=this._disableJSONLD?{}:this._getJSONLD(this._doc);this._removeScripts(this._doc),this._prepDocument();var i=this._getArticleMetadata(t);this._articleTitle=i.title;var a=this._grabArticle();if(!a)return null;if(this.log("Grabbed: "+a.innerHTML),this._postProcessContent(a),!i.excerpt){var r=a.getElementsByTagName("p");r.length>0&&(i.excerpt=r[0].textContent.trim())}var n=a.textContent;return{title:this._articleTitle,byline:i.byline||this._articleByline,dir:this._articleDir,lang:this._articleLang,content:this._serializer(a),textContent:n,length:n.length,excerpt:i.excerpt,siteName:i.siteName||this._articleSiteName,publishedTime:i.publishedTime}}},e.exports=t}(t)),t.exports),l=(i||(i=1,function(){var e={
// NOTE: These two regular expressions are duplicated in
// Readability.js. Please keep both copies in sync.
unlikelyCandidates:/-ad-|ai2html|banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|footer|gdpr|header|legends|menu|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote/i,okMaybeItsACandidate:/and|article|body|column|content|main|shadow/i};function t(e){return(!e.style||"none"!=e.style.display)&&!e.hasAttribute("hidden")&&(!e.hasAttribute("aria-hidden")||"true"!=e.getAttribute("aria-hidden")||e.className&&e.className.indexOf&&-1!==e.className.indexOf("fallback-image"))}n.exports=function(i,a={}){"function"==typeof a&&(a={visibilityChecker:a});var r={minScore:20,minContentLength:140,visibilityChecker:t};a=Object.assign(r,a);var n=i.querySelectorAll("p, pre, article"),s=i.querySelectorAll("div > br");if(s.length){var l=new Set(n);[].forEach.call(s,(function(e){l.add(e.parentNode)})),n=Array.from(l)}var o=0;return[].some.call(n,(function(t){if(!a.visibilityChecker(t))return!1;var i=t.className+" "+t.id;if(e.unlikelyCandidates.test(i)&&!e.okMaybeItsACandidate.test(i))return!1;if(t.matches("li p"))return!1;var r=t.textContent.trim().length;return!(r<a.minContentLength)&&(o+=Math.sqrt(r-a.minContentLength))>a.minScore}))}}()),n.exports);return a={Readability:s,isProbablyReaderable:l}}();export{s as r};
|