Spaces:
Configuration error
Configuration error
import datetime as dt | |
import pandas as pd | |
import itertools | |
import pymongo # requires dnspython | |
import configparser | |
from .mongo_credentials import credentials | |
CREDENTIALS = credentials | |
class _MongoAbstract(object): | |
_instance = None | |
_credentials = None | |
_name = None | |
_key_ls = None | |
_del_many_limit = 50 | |
_override_boolean_key = None | |
def __init__(self, instance='remote', **query): | |
self._set_instance(instance) | |
self._set_credentials() | |
if not isinstance(self._name, str) or not isinstance(self._instance, str) or not isinstance(self._key_ls, list): | |
raise Exception('Instance (str), collection _name (str) and _key_ls (list) need to be properly declared!') | |
db_obj = self._get_db_object() | |
collection_names_ls = db_obj.list_collection_names() | |
idx_tup_ls = [(i, pymongo.ASCENDING) for i in self._key_ls] | |
if self._name not in collection_names_ls: | |
print(f"Creating new {self._instance} collection: {self._name}\n") | |
collection = db_obj[self._name] # this will create the collection | |
collection.create_index(idx_tup_ls, unique=True, name=self._name) # this will index it | |
else: | |
collection = db_obj[self._name] # collection already exists | |
self._db_obj = db_obj | |
self._collection = collection | |
query = self._override_boolean_keys(**query) | |
self._is_adv_query = sum([isinstance(v, list) for v in query.values()]) | |
self._set_query(query) | |
self._set_advanced_query() | |
self._out = None | |
self._cursor = self._get_cursor() | |
self._full_key_ls = self._key_ls + ['values'] | |
def _set_credentials(self): | |
if self._credentials is None: | |
self._credentials = CREDENTIALS | |
def _get_credentials(self): | |
if self._credentials is None: | |
self._set_credentials() | |
return self._credentials | |
def _set_instance(self, instance): | |
self._instance = instance | |
def _override_boolean_keys(self, **query): | |
if self._get_boolean_keys() and query: | |
for key in self._get_boolean_keys(): | |
if key in query.keys(): | |
if isinstance(query[key], (tuple, list)): | |
query[key] = [str(int(i)) if isinstance(i, bool) else i for i in query[key]] | |
else: | |
if isinstance(query[key], bool): | |
query[key] = str(int(query[key])) | |
return query | |
def _get_boolean_keys(self): | |
if self._override_boolean_key is not None: | |
if isinstance(self._override_boolean_key, str): | |
return [self._override_boolean_key] | |
elif isinstance(self._override_boolean_key, list): | |
return self._override_boolean_key | |
else: | |
raise Exception(f'_override_boolean_key not recognized {self._override_boolean_key}') | |
else: | |
return None | |
def __repr__(self): | |
print_str = f'database: {self._get_db_name()}' | |
print_str += f'\ninstance: {self._get_instance()}' | |
print_str += f'\ncollection: {self._get_name()}' | |
key_str = ', '.join([i for i in self.get_keys() if i != 'values']) | |
print_str += f'\nkeys: [{key_str}]' | |
storage_size = self._db_obj.command('collstats', self._get_name())['size'] / 1e6 | |
print_str += f'\nstorage size: {storage_size:.1f}MB' | |
num_docs = self._get_cursor().count() | |
print_str += f'\ndocuments: {num_docs}' | |
return print_str | |
def _get_collection(self): | |
return self._collection | |
def _get_instance(self): | |
return self._instance | |
def _get_name(self): | |
return self._name | |
def _get_host_and_port(self): | |
user = self._get_credentials()['user'] | |
password = self._get_credentials()['password'] | |
host = self._get_credentials()['host'] | |
port = self._get_credentials()['port'] | |
full_hostname = f"mongodb+srv://{user}:{password}@{host}/{port}" | |
return full_hostname, port | |
def _get_db_object(self): | |
client = pymongo.MongoClient(host=self._get_host_and_port()[0], port=self._get_host_and_port()[1]) | |
return client[self._get_db_name()] | |
def _get_db_name(self): | |
return self._get_credentials()['dbname'] | |
def _set_advanced_query(self): | |
if self._is_adv_query: | |
and_ls = [] | |
ls_dd = self._product_dict(**self._query) | |
for dd in ls_dd: | |
and_ls.extend([{'$and': [dd]}]) | |
or_dd = {'$or': and_ls} | |
self._query = or_dd | |
def _get_query(self): | |
return self._query | |
def _get_cursor(self): | |
if self._out is None: | |
self._cursor = self._collection.find(self._query) | |
return self._cursor | |
def _product_dict(**kwargs): | |
for key, val in kwargs.items(): # make sure adv query values are all lists | |
if not isinstance(val, list): | |
kwargs.update({key: [val]}) | |
keys = kwargs.keys() | |
vals = kwargs.values() | |
for instance in itertools.product(*vals): | |
yield dict(zip(keys, instance)) | |
def _set_query(self, query): | |
query_dd = dict() | |
for k, v in query.items(): | |
if v is not None: # drop None from queries, but allow it to pass | |
if isinstance(v, str) and '~' in v: | |
query_dd[k] = {'$ne': v.replace('~', '')} | |
elif isinstance(v, list): | |
new_ls = list() | |
neg_ls = list() | |
for i in v: | |
if isinstance(i, str) and '~' in i: | |
neg_ls.append(i.replace('~', '')) | |
else: | |
new_ls.append(i) | |
if len(neg_ls) > 0: | |
new_ls.append({'$nin': neg_ls}) | |
query_dd[k] = new_ls | |
else: | |
query_dd[k] = v | |
self._query = query_dd | |
def drop_collection(self, confirm=''): | |
if confirm != 'confirm': | |
print('drop_collection not confirmed. Make sure you know what you are doing. Pass "confirm" to drop.') | |
else: | |
db_obj = self._get_db_object() | |
db_obj.drop_collection(self._name) | |
print('Database: %s' % self._get_db_name()) | |
print('Collection dropped: %s' % self._name) | |
def _reindex(self, confirm=''): | |
if confirm != 'confirm': | |
print('reindex not confirmed. Nothing done. Pass "confirm" to reindex.') | |
else: | |
collection = self._get_collection() | |
collection.drop_indexes() | |
idx_tup_ls = [(i, pymongo.ASCENDING) for i in self._key_ls] | |
collection.create_index(idx_tup_ls, unique=True, name=self._name) # this will index it | |
print('collection %s successfully re-indexed' % self._name) | |
def count(self): | |
print(f'Database: {self._get_db_name()}') | |
print(f'Collection: {self._name}') | |
print(f'Query: {self._query}') | |
print(f'Documents: {self._collection.count_documents(self._query)}') | |
def replace_field_value(self, existing_field=True, **kwargs): | |
""" | |
make sure you declare the appropriate key=value arguments to be modified in your query! | |
""" | |
if len(kwargs) != 1: | |
raise Exception('please rename only one field at a time') | |
field = list(kwargs.keys())[0] | |
value = kwargs[field] | |
if field not in self.get_keys() and existing_field: # check init that everything is ok to reindex | |
raise Exception('new field needs to be declared in collection keys before replace / re-index') | |
if field == 'values': | |
raise Exception('values cannot be replace with this method. Use .update_values() instead') | |
if existing_field: | |
if not self._query: | |
raise Exception('Cannot replace key value without filters.') | |
if self._get_boolean_keys() and field in self._get_boolean_keys(): | |
value = str(int(value)) | |
cursor = self._get_cursor() | |
request_ls = [] | |
for doc in cursor: | |
if existing_field: | |
doc.pop(field) | |
update_dd = {'$set': {field: value}} | |
request = pymongo.UpdateOne(doc, update_dd, upsert=False) | |
request_ls.append(request) | |
if len(request_ls) > 0: | |
self._get_collection().bulk_write(request_ls) | |
print(f'collection field {field} successfully updated to {value}') | |
if not existing_field: | |
self._reindex('confirm') | |
else: | |
print('no operations to handle') | |
def get_keys(self): | |
return [i for i in self._key_ls] # pass a copy | |
def get_key_values(self): | |
match_dd = {'$match': self._query} | |
key_dd = {k: f'${k}' for k in self._key_ls} | |
group_dd = {'$group': {'_id': key_dd}} | |
cursor = self._collection.aggregate([match_dd, group_dd]) | |
ls = [i['_id'] for i in list(cursor)] | |
keys_df = pd.DataFrame(ls) | |
if not keys_df.empty: | |
keys_df = keys_df.sort_values(by=self._key_ls) | |
keys_df = keys_df.reset_index(drop=True) | |
return keys_df | |
def _is_local_instance(self): | |
return self._get_instance() == 'local' | |
def delete_many(self, confirm=''): | |
if not self._query: | |
raise Exception('Cannot delete_many without filters. Use drop_collection() instead for a full drop.') | |
lmt = self._del_many_limit | |
cnt = self._collection.count_documents(self._query) | |
if (cnt > lmt) and confirm != 'confirm': | |
raise Exception(f'confirm delete_many() if more than {lmt} documents. Potentially deleted: {cnt}') | |
before = self._collection.count_documents({}) | |
self._get_collection().delete_many(self._query) | |
after = self._collection.count_documents({}) | |
self._out = None | |
print(f'{before - after} document(s) deleted') | |
def distinct(self, field): | |
if isinstance(field, str): | |
field = field | |
cursor = self._get_cursor() | |
return cursor.distinct(field) # faster | |
elif isinstance(field, list): | |
group_dd = {"_id": {i: f'${i}' for i in field}} | |
if len(self._query) > 0: | |
cursor = self._collection.aggregate([{'$match': self._query}, | |
{"$group": group_dd}]) | |
else: | |
cursor = self._collection.aggregate([{"$group": group_dd}]) | |
dd = [doc['_id'] for doc in cursor] | |
return pd.DataFrame().from_dict(dd) | |
class _MongoSeries(_MongoAbstract): | |
_drop_weekends = None | |
def __init__(self, instance='remote', **query): | |
_MongoAbstract.__init__(self, instance=instance, **query) | |
def _get_drop_weekends(self): | |
return self._drop_weekends | |
def _update_query_date_range(self, start, end): | |
if start is not None and end is None: | |
self._query['values.date'] = start | |
# self._query['values'] = {'$elemMatch': {'date': {'$gte': start}}} | |
elif start is None and end is not None: | |
self._query['values.date'] = {'$lte': end} | |
elif start is not None and end is not None: | |
self._query['values.date'] = {'$gte': start, '$lte': end} | |
else: # both none | |
pass | |
def _get_query_cursor(self, start, end): | |
if start is None and end is None: | |
return self._get_cursor() # simple case | |
else: | |
co = self._get_collection() | |
key_ls = self.get_keys() | |
match_dd = {'$match': self._query} | |
if start is not None and end is None: | |
if isinstance(start, (dt.datetime, dt.date)): | |
start = start.strftime('%Y%m%d') | |
start = start.replace('-', '') | |
filter_dd = { | |
'$filter': | |
{'input': '$values', | |
'as': 'vals', | |
'cond': {'$gte': ['$$vals.date', start]} | |
} | |
} | |
elif start is None and end is not None: | |
if isinstance(end, dt.datetime): | |
end = end.strftime('%Y%m%d') | |
end = end.replace('-', '') | |
filter_dd = { | |
'$filter': | |
{'input': '$values', | |
'as': 'vals', | |
'cond': {'$lte': ['$$vals.date', end]} | |
} | |
} | |
else: # both defined | |
if isinstance(start, dt.datetime): | |
start = start.strftime('%Y%m%d') | |
start = start.replace('-', '') | |
if isinstance(end, dt.datetime): | |
end = end.strftime('%Y%m%d') | |
end = end.replace('-', '') | |
filter_dd = { | |
'$filter': | |
{'input': '$values', | |
'as': 'vals', | |
'cond': {'$and': [{'$gte': ['$$vals.date', start]}, | |
{'$lte': ['$$vals.date', end]}]} | |
} | |
} | |
key_dd = {i: 1 for i in key_ls} | |
key_dd['values'] = filter_dd | |
project_dd = {'$project': key_dd} | |
cursor = co.aggregate([match_dd, project_dd]) | |
return cursor | |
def insert(self, pd_obj, col_key): | |
if pd_obj.empty: | |
print('pd_obj empty, no data to insert') | |
return | |
if isinstance(pd_obj, pd.Series): | |
pd_obj = pd.DataFrame(pd_obj) | |
# check no dups | |
if pd_obj.columns.has_duplicates: | |
raise Exception('inserted object cannot have duplicated keys!') | |
# initial checks | |
if isinstance(col_key, str): | |
col_key = [col_key] | |
if len(self._key_ls) != len(col_key): | |
name = self._name | |
full = len(self._key_ls) | |
given = len(col_key) | |
raise Exception(f'insert error: {name}. col_key must have {full} elements. provided was {given}') | |
df = pd_obj.copy() | |
df.index = df.index.map(lambda i: i.strftime('%Y%m%d')) | |
# integrity check | |
for col in pd_obj.columns: | |
col_val_ls = col.split('.') | |
if len(self._full_key_ls) - 1 != len(col_val_ls): | |
raise Exception(f'all columns must have {len(col_key)} elements. provided was {col_val_ls}') | |
# do not store full NaN rows or columns | |
df = df.dropna(how='all', axis=0) | |
df = df.dropna(how='all', axis=1) | |
request_ls = [] | |
for col in df.columns: | |
col_val_ls = col.split('.') | |
tup = zip(col_key, col_val_ls) | |
query_dd = {k: v for k, v in tup} | |
values_dd = df[col].dropna().to_dict() | |
values_ls = [{'date': k, 'value': v} for k, v in values_dd.items()] | |
update_dd = {'$set': {'values': values_ls}} | |
request = pymongo.UpdateOne(query_dd, update_dd, upsert=True) | |
request_ls.append(request) | |
self._get_collection().bulk_write(request_ls) | |
def last_update(self, how='each', order='min'): | |
match_dd = {'$match': self._query} | |
key_dd = {k: f'${k}' for k in self._key_ls} | |
group_dd = { | |
'$group': | |
{'_id': | |
{'_id': key_dd}, | |
'max': {'$max': "$values.date"} | |
} | |
} | |
sort_dd = {'$sort': {'values.date': 1}} | |
cursor = self._get_collection().aggregate([match_dd, group_dd, sort_dd]) | |
last_df = pd.DataFrame() | |
for num, doc in enumerate(cursor): | |
dd = doc['_id']['_id'] | |
dd['last_update'] = doc['max'][-1] | |
tmp_df = pd.DataFrame.from_dict(dd, orient='index').T | |
last_df = pd.concat([last_df, tmp_df], axis=0, sort=True) | |
last_df = last_df[self._key_ls + ['last_update']] | |
last_df['last_update'] = last_df['last_update'].apply(pd.to_datetime) | |
days_f = lambda x: (dt.datetime.today() - x).days | |
last_df['last_update_days'] = last_df['last_update'].apply(days_f) | |
if how == 'all': | |
if order == 'max': | |
return last_df['last_update'].max() | |
elif order == 'min': | |
return last_df['last_update'].min() | |
else: | |
raise Exception(f'argument order not recognized {order}') | |
elif how == 'each': | |
return last_df | |
else: | |
raise Exception(f'argument how not recognized: {how}') | |
def query(self, start=None, end=None, rename=None, expand=False): | |
""" | |
override is for self.last_update() | |
""" | |
if self._out is None: | |
cursor = self._get_query_cursor(start, end) | |
if rename is not None: | |
if isinstance(rename, str): | |
rename = [rename] | |
exclude_ls = ['_id', 'values'] | |
df = pd.DataFrame() | |
for doc in cursor: | |
if len(doc['values']) > 0: | |
if rename is not None: | |
name = '.'.join([doc[i] for i in rename]) | |
else: | |
name = '.'.join([doc[i] for i in sorted(doc.keys()) if i not in exclude_ls]) | |
doc_df = pd.DataFrame().from_dict(doc['values']).set_index('date') | |
doc_df.columns = [name] | |
df = pd.concat([df, doc_df], axis=1, sort=True) | |
if df.empty: | |
return df | |
df.index = df.index.map(pd.to_datetime) | |
if self._get_drop_weekends(): | |
week_days_ls = df.index.weekday < 5 | |
df = df.loc[week_days_ls].copy() | |
if expand: | |
df.columns = df.columns.str.split('.', expand=True) | |
# return series if only one column or row | |
if len(df.columns) == 1: | |
df = df[df.columns[0]].copy() | |
if len(df.index) == 1: | |
df = df.iloc[-1].copy() | |
self._out = df.copy() | |
return self._out | |
def drop_datapoint(self, date_str): | |
if self._collection.count_documents(self._query) == 0: | |
raise Exception('no documents found with given filters') | |
if self._collection.count_documents(self._query) > 1: | |
raise Exception('you can only drop datapoints one series at a time') | |
if isinstance(date_str, dt.datetime): | |
date_str = date_str.strftime('%Y%m%d') | |
cursor = self._get_cursor() | |
request_ls = [] | |
for doc in cursor: | |
doc.pop('_id') | |
doc.pop('values') | |
doc['values.date'] = date_str | |
update_dd = {'$unset': {'values.$.value': ""}} | |
request = pymongo.UpdateOne(doc, update_dd, upsert=False) | |
request_ls.append(request) | |
self._get_collection().bulk_write(request_ls) | |
def update_values(self, pd_obj, col_key): | |
if pd_obj.empty: | |
print('pd_obj empty, no data to update') | |
return | |
# initial checks | |
if isinstance(col_key, str): | |
col_key = [col_key] | |
if len(self._key_ls) != len(col_key): | |
name = self._name | |
full = len(self._key_ls) | |
given = len(col_key) | |
raise Exception(f'insert error: {name}. col_key must have {full} elements. provided was {given}') | |
if isinstance(pd_obj, pd.Series): | |
df = pd.DataFrame(pd_obj) | |
else: | |
df = pd_obj.copy() | |
# check no dups | |
if df.columns.has_duplicates: | |
raise Exception('inserted object cannot have duplicated keys!') | |
# integrity check | |
for col in df.columns: | |
col_val_ls = col.split('.') | |
if len(self._full_key_ls) - 1 != len(col_val_ls): | |
raise Exception(f'all columns must have {len(col_key)} elements. provided was {col_val_ls}') | |
df.index = df.index.map(lambda i: i.strftime('%Y%m%d')) | |
df = df.dropna(axis=0, how='all') | |
df = df.dropna(axis=1, how='all') | |
request_ls = [] | |
for col in df.columns: | |
col_val_ls = col.split('.') | |
tup = zip(col_key, col_val_ls) | |
query_dd = {k: v for k, v in tup} | |
values_dd = df[col].dropna().to_dict() | |
# pull many | |
date_ls = list(values_dd.keys()) | |
update_dd = { | |
'$pull': {'values': {'date': {'$in': date_ls}}} | |
} | |
request = pymongo.UpdateMany(query_dd, update_dd, upsert=True) | |
request_ls.append(request) | |
# push many | |
new_ls = [{'date': k, 'value': v} for k, v in values_dd.items()] | |
update_dd = {'$push': {'values': {'$each': new_ls}}} | |
request = pymongo.UpdateMany(query_dd, update_dd, upsert=True) | |
request_ls.append(request) | |
self._get_collection().bulk_write(request_ls, ordered=True) | |
class _MongoDoc(_MongoAbstract): | |
def __init__(self, instance='remote', **query): | |
_MongoAbstract.__init__(self, instance=instance, **query) | |
def insert(self, dd): | |
if not isinstance(dd, dict): | |
print('insert dict empty, no data to insert') | |
return | |
# initial checks | |
if sorted(list(self._get_query().keys())) != sorted(self.get_keys()): | |
raise Exception(f'specify a full query to insert into a _MongoTable! Keys are: {str(self.get_keys())}') | |
cast_key_ls = set([str(k) for k in dd.keys()]) | |
if len(cast_key_ls) != len(dd.keys()): | |
raise Exception('identical string representations of keys are not allowed') | |
insert_dd = {str(k): v for k, v in dd.items()} | |
filter_dd = self._get_query() | |
update_dd = {'$set': {'values': insert_dd}} | |
self._get_collection().update_one(filter_dd, update=update_dd, upsert=True) | |
def query(self, expand=False, rename=None): | |
""" | |
override is for self.last_update() | |
""" | |
if rename: | |
if isinstance(rename, str): | |
rename = [rename] | |
if self._out is None: | |
cursor = self._get_cursor() # simple case | |
df = pd.DataFrame() | |
for num, doc in enumerate(cursor): | |
if doc['values']: | |
doc.pop('_id') | |
val_dd = doc.pop('values') | |
if rename: | |
name = '.'.join([str(doc[k]) for k in rename]) | |
else: | |
name = '.'.join([str(doc[k]) for k in sorted(doc.keys())]) | |
s = pd.Series(val_dd, name=name, dtype=object) | |
df = pd.concat([df, s], axis=1, sort=True) | |
if expand: | |
df.columns = df.columns.str.split('.', expand=True) | |
# return series if only one column or row | |
if len(df.columns) == 1: | |
df = df[df.columns[0]].copy() | |
if len(df.index) == 1: | |
df = df.iloc[-1].copy() | |
self._out = df | |
return self._out | |
class _MongoTable(_MongoAbstract): | |
def __init__(self, instance='remote', **query): | |
_MongoAbstract.__init__(self, instance=instance, **query) | |
def insert(self, pd_obj): | |
if pd_obj.empty: | |
print('pd_obj empty, no data to insert') | |
return | |
if isinstance(pd_obj, pd.Series): | |
pd_obj = pd.DataFrame(pd_obj) | |
# check no dups | |
if pd_obj.columns.has_duplicates: | |
raise Exception('inserted object cannot have duplicated columns!') | |
# initial checks | |
if sorted(list(self._get_query().keys())) != sorted(self.get_keys()): | |
raise Exception(f'specify a full query to insert into a _MongoTable! Keys are: {str(self.get_keys())}') | |
# drop nan | |
df = pd_obj.copy() | |
df = df.dropna(how='all', axis=0) | |
df = df.dropna(how='all', axis=1) | |
if isinstance(df.columns, pd.core.indexes.multi.MultiIndex): | |
df.columns = ['.'.join([i for i in col]) for col in df.columns] | |
if isinstance(df.index, pd.core.indexes.multi.MultiIndex): | |
df.index = ['.'.join([i for i in col]) for col in df.columns] | |
df.columns = df.columns.map(str) | |
df.index = df.index.map(str) | |
values_dd = df.to_dict(orient='records') | |
filter_dd = self._get_query() | |
update_dd = {'$set': {'values': values_dd}} | |
self._get_collection().update_one(filter_dd, update=update_dd, upsert=True) | |
def query(self, rename=None, expand=False): | |
""" | |
override is for self.last_update() | |
""" | |
if self._out is None: | |
cursor = self._get_cursor() # simple case | |
if rename is not None: | |
if isinstance(rename, str): | |
rename = [rename] | |
exclude_ls = ['_id', 'values'] | |
out_df = pd.DataFrame() | |
cnt = 0 | |
for num, doc in enumerate(cursor): | |
if len(doc['values']) > 0: | |
df = pd.DataFrame().from_records(doc['values']) | |
if rename is not None: | |
name = '.'.join([doc[i] for i in rename]) | |
else: | |
name = '.'.join([doc[i] for i in sorted(doc.keys()) if i not in exclude_ls]) | |
id_ls = [name] * df.shape[0] | |
df['_id'] = id_ls | |
out_df = pd.concat([out_df, df], axis=0, sort=True) | |
cnt += 1 | |
if out_df.empty: | |
return out_df | |
if cnt > 1: | |
out_df = out_df.reset_index(drop=False) | |
out_df = out_df.set_index('_id', drop=True) | |
out_df.index.name = None | |
else: | |
out_df = out_df.drop('_id', axis=1) | |
if expand: | |
out_df.index = out_df.index.str.split('.', expand=True) | |
# return series if only one column or row | |
if len(out_df.columns) == 1: | |
out_df = out_df[out_df.columns[0]].copy() | |
if len(out_df.index) == 1: | |
out_df = out_df.iloc[-1].copy() | |
self._out = out_df.copy() | |
return self._out | |
class _MongoLog(_MongoAbstract): | |
def __init__(self, instance='remote', **query): | |
_MongoAbstract.__init__(self, instance=instance, **query) | |
def _log(self, df): | |
request_ls = [] | |
if not df.empty: | |
for (section, iso_dt), row_s in df.iterrows(): | |
values_dd = row_s.to_dict() | |
doc_id = {'section': section, | |
'iso_date': iso_dt} | |
update_dd = {'$push': {'values': values_dd}} | |
request = pymongo.UpdateOne(doc_id, update_dd, upsert=True) | |
request_ls.append(request) | |
self._get_collection().bulk_write(request_ls) | |
def query(self, rename=None, expand=False): | |
""" | |
override is for self.last_update() | |
""" | |
if self._out is None: | |
cursor = self._get_cursor() # simple case | |
if rename is not None: | |
if isinstance(rename, str): | |
rename = [rename] | |
exclude_ls = ['_id', 'values'] | |
df = pd.DataFrame() | |
for doc in cursor: | |
if len(doc['values']) > 0: | |
if rename is not None: | |
name = '.'.join([doc[i] for i in rename]) | |
else: | |
name = '.'.join([doc[i] for i in sorted(doc.keys()) if i not in exclude_ls]) | |
doc_df = pd.DataFrame().from_records(doc['values']) | |
doc_df.index = [name] * len(doc_df.index) | |
df = pd.concat([df, doc_df], axis=0, sort=True) | |
if df.empty: | |
return df | |
if expand: | |
df.index = df.index.str.split('.', expand=True) | |
# return series if only one column or row | |
if len(df.columns) == 1: | |
df = df[df.columns[0]].copy() | |
if len(df.index) == 1: | |
df = df.iloc[-1].copy() | |
self._out = df.copy() | |
return self._out | |