Spaces:

jiangjiechen
/

loren-fact-checking

Build error

loren-fact-checking / src /er_client /doc_retrieval_by_api.py

init loren for spaces

7f7285f over 3 years ago

1.17 kB

	# -- coding: utf-8 --

	'''
	@Author : Jiangjie Chen
	@Time : 2020/11/12 21:19
	@Contact : [email protected]
	@Description:
	'''

	import wikipediaapi
	import nltk
	from nltk.tokenize import sent_tokenize
	nltk.download('punkt')
	try:
	from entitylinker import ELClient
	except:
	from .entitylinker import ELClient


	class DocRetrieval:
	def __init__(self, link_type):
	self.wiki = wikipediaapi.Wikipedia('en')
	self.er_client = ELClient(link_type, verbose=True)

	def _get_page(self, title):
	summary = self.wiki.page(title).summary
	sents = []
	for i, sent in enumerate(sent_tokenize(summary)):
	sents.append((title, i, sent, 0))
	return sents

	def retrieve_docs(self, claim):
	el_results = self.er_client.link(claim)
	sents = []
	for text, label, kb_id, title in el_results:
	if title == '': continue
	sents += self._get_page(title)
	return sents


	if __name__ == '__main__':
	doc = DocRetrieval('tagme')
	print(doc.retrieve_docs('joe biden won the U.S. president.'))
	print(doc.retrieve_docs('Joe Biden won the U.S. president.'))