#!/usr/bin/env python3 # Copyright 2017-present, Facebook, Inc. # All rights reserved. # # This source code is licensed under the license found in the # LICENSE file in the root directory of this source tree. """Documents, in a sqlite database.""" import sqlite3 from . import utils from . import DEFAULTS class DocDB(object): """Sqlite backed document storage. Implements get_doc_text(doc_id). """ def __init__(self, db_path=None): self.path = db_path or DEFAULTS['db_path'] self.connection = sqlite3.connect(self.path, check_same_thread=False) def __enter__(self): return self def __exit__(self, *args): self.close() def path(self): """Return the path to the file that backs this database.""" return self.path def close(self): """Close the connection to the database.""" self.connection.close() def get_doc_ids(self): """Fetch all ids of docs stored in the db.""" cursor = self.connection.cursor() cursor.execute("SELECT id FROM documents") results = [r[0] for r in cursor.fetchall()] cursor.close() return results def get_doc_text(self, doc_id): """Fetch the raw text of the doc for 'doc_id'.""" cursor = self.connection.cursor() cursor.execute( "SELECT text FROM documents WHERE id = ?", (utils.normalize(doc_id), ) # (doc_id, ) ) result = cursor.fetchone() cursor.close() return result if result is None else result[0] def get_doc_title(self, doc_id): """Fetch the raw text of the doc for 'doc_id'.""" cursor = self.connection.cursor() cursor.execute( "SELECT title FROM documents WHERE id = ?", (utils.normalize(doc_id),) # (doc_id, ) ) result = cursor.fetchone() cursor.close() return result if result is None else result[0] def get_doc_intro(self, doc_id): """Fetch the raw text of the doc for 'doc_id'.""" cursor = self.connection.cursor() cursor.execute( "SELECT intro FROM documents WHERE id = ?", # intro: the introduction of Wikipedia page (utils.normalize(doc_id),) # (doc_id, ) ) result = cursor.fetchone() cursor.close() return result if result is None else result[0]