wiki-chat / Article.py
Pennywise881's picture
uploaded code files
9f23e0b
import wikipediaapi
class Article:
def __init__(self, article_name):
self.article_data = {}
self.article = wikipediaapi.Wikipedia('en').page(article_name)
def article_exists(self):
try:
if self.article.exists():
return True
except:
return False
def get_sections_and_texts(self, sections):
if 'Summary' not in self.article_data:
self.article_data['Summary'] = ''
if self.article.summary:
self.article_data['Summary'] = self.article.summary.lower().split('\n')
for section in sections:
if section.text:
self.article_data[section.title] = section.text.lower().split('\n')
if len(section.sections) > 0:
self.get_sections_and_texts(section.sections)
def remove_empty_sections(self):
for _, docs in self.article_data.items():
for d in docs:
if len(d) <= 0:
docs.remove(d)
def get_article_data(self):
self.get_sections_and_texts(self.article.sections)
self.remove_empty_sections()
num_docs = sum(len(docs) for _, docs in self.article_data.items())
avg_doc_len = sum(len(doc.split()) for _, docs in self.article_data.items() for doc in docs) / num_docs
return {
'article_data': self.article_data,
'num_docs': num_docs,
'avg_doc_len': avg_doc_len
}