Spaces:
Runtime error
Runtime error
File size: 1,525 Bytes
9f23e0b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
import wikipediaapi
class Article:
def __init__(self, article_name):
self.article_data = {}
self.article = wikipediaapi.Wikipedia('en').page(article_name)
def article_exists(self):
try:
if self.article.exists():
return True
except:
return False
def get_sections_and_texts(self, sections):
if 'Summary' not in self.article_data:
self.article_data['Summary'] = ''
if self.article.summary:
self.article_data['Summary'] = self.article.summary.lower().split('\n')
for section in sections:
if section.text:
self.article_data[section.title] = section.text.lower().split('\n')
if len(section.sections) > 0:
self.get_sections_and_texts(section.sections)
def remove_empty_sections(self):
for _, docs in self.article_data.items():
for d in docs:
if len(d) <= 0:
docs.remove(d)
def get_article_data(self):
self.get_sections_and_texts(self.article.sections)
self.remove_empty_sections()
num_docs = sum(len(docs) for _, docs in self.article_data.items())
avg_doc_len = sum(len(doc.split()) for _, docs in self.article_data.items() for doc in docs) / num_docs
return {
'article_data': self.article_data,
'num_docs': num_docs,
'avg_doc_len': avg_doc_len
} |