File size: 1,998 Bytes
89cd5d5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
from typing import Optional, Callable
import logging
logger = logging.getLogger(__name__)
logging.basicConfig(encoding='utf-8', level=logging.DEBUG)
from src.vectordb.helpers import read_docs, read_listings, preprocess_df
from src.vectordb.schema import WikivoyageDocuments, WikivoyageListings
from src.vectordb.helpers import set_uri
import lancedb


def _create_table_and_ingest_data(table_name: str, schema: object, data_fetcher: Callable,
                                  preprocessor: Optional[Callable] = None):
    """
    Generalized function to create a table and ingest data into the database.

    Args:
        - table_name: str, name of the table to create.
        - schema: object, schema of the table.
        - data_fetcher: Callable, function to fetch the data.
        - preprocessor: Optional[Callable], function to preprocess the data (default is None).
    """
    uri = set_uri()

    db = lancedb.connect(uri)
    logger.info(f"Connected to DB. Reading data for table {table_name} now...")

    df = data_fetcher()

    if preprocessor:
        df = preprocessor(df)

    logger.info(f"Finished reading data for {table_name}, attempting to create table and ingest the data...")

    db.drop_table(table_name, ignore_missing=True)
    table = db.create_table(table_name, schema=schema)

    table.add(df.to_dict('records'))
    logger.info(f"Completed ingestion for {table_name}.")


def create_wikivoyage_docs_db_and_add_data():
    """
    Creates the Wikivoyage documents table and ingests data.
    """
    _create_table_and_ingest_data(
        table_name="wikivoyage_documents",
        schema=WikivoyageDocuments,
        data_fetcher=read_docs,
        preprocessor=preprocess_df
    )


def create_wikivoyage_listings_db_and_add_data():
    """
    Creates the Wikivoyage listings table and ingests data.
    """
    _create_table_and_ingest_data(
        table_name="wikivoyage_listings",
        schema=WikivoyageListings,
        data_fetcher=read_listings
    )