Spaces:
Runtime error
Runtime error
import wikipediaapi | |
from .utils import Utils | |
from.rake import Rake | |
class Subject(Utils, Rake): | |
def __init__(self, subject, bs_parser="lxml"): | |
Utils.__init__(self, latin_encoder="latin-1") | |
Rake.__init__(self) | |
self.subject = subject | |
self.bs_parser = bs_parser | |
self.wikipedia = wikipediaapi.Wikipedia(language='en', | |
extract_format=wikipediaapi.ExtractFormat.WIKI) | |
# lazy loaded | |
self.wikipedia_object = None | |
def _get_wiki_object(self): | |
if not self.wikipedia_object: | |
self.wikipedia_object = self.wikipedia.page(self.subject) | |
def _get_links(self): | |
if not self.wikipedia_object: | |
self._get_wiki_object() | |
return self.wikipedia_object.links | |
def _get_url(self): | |
if not self.wikipedia_object: | |
self._get_wiki_object() | |
return self.wikipedia_object.fullurl | |
def _check_page_exists(self): | |
if not self.wikipedia_object: | |
self._get_wiki_object() | |
return self.wikipedia_object.exists() | |
def _get_summary(self): | |
if not self.wikipedia_object: | |
self._get_wiki_object() | |
return self.wikipedia_object.summary | |
def _get_content(self): | |
if not self.wikipedia_object: | |
self._get_wiki_object() | |
return self.wikipedia_object.text | |
# def _get_html(self): | |
# if not self.wikipedia_object: | |
# self._get_wiki_object() | |
# return self.wikipedia_object.html | |
def get_meta(self): | |
if not self.wikipedia_object: | |
self._get_wiki_object() | |
self.meta["url"] = self.wikipedia_object.fullurl | |
self.meta["title"] = self.wikipedia_object.title | |
def get_top_keywords_with_score_from_rake(self): | |
return self.get_keyphrases_with_score(self._get_content()) | |
def get_top_keywords_from_rake(self): | |
return self.get_keyphrases(self._get_content()) | |