Spaces:
Running
Running
File size: 1,998 Bytes
89cd5d5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
from typing import Optional, Callable
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
from src.vectordb.helpers import read_docs, read_listings, preprocess_df
from src.vectordb.schema import WikivoyageDocuments, WikivoyageListings
from src.vectordb.helpers import set_uri
import lancedb
def _create_table_and_ingest_data(table_name: str, schema: object, data_fetcher: Callable,
preprocessor: Optional[Callable] = None):
"""
Generalized function to create a table and ingest data into the database.
Args:
- table_name: str, name of the table to create.
- schema: object, schema of the table.
- data_fetcher: Callable, function to fetch the data.
- preprocessor: Optional[Callable], function to preprocess the data (default is None).
"""
uri = set_uri()
db = lancedb.connect(uri)
logger.info(f"Connected to DB. Reading data for table {table_name} now...")
df = data_fetcher()
if preprocessor:
df = preprocessor(df)
logger.info(f"Finished reading data for {table_name}, attempting to create table and ingest the data...")
db.drop_table(table_name, ignore_missing=True)
table = db.create_table(table_name, schema=schema)
table.add(df.to_dict('records'))
logger.info(f"Completed ingestion for {table_name}.")
def create_wikivoyage_docs_db_and_add_data():
"""
Creates the Wikivoyage documents table and ingests data.
"""
_create_table_and_ingest_data(
table_name="wikivoyage_documents",
schema=WikivoyageDocuments,
data_fetcher=read_docs,
preprocessor=preprocess_df
)
def create_wikivoyage_listings_db_and_add_data():
"""
Creates the Wikivoyage listings table and ingests data.
"""
_create_table_and_ingest_data(
table_name="wikivoyage_listings",
schema=WikivoyageListings,
data_fetcher=read_listings
) |