Spaces:
Runtime error
Runtime error
import csv | |
import json | |
from pathlib import Path | |
from typing import Optional, List, Union, Any, Dict | |
from phi.tools import Toolkit | |
from phi.utils.log import logger | |
class CsvTools(Toolkit): | |
def __init__( | |
self, | |
csvs: Optional[List[Union[str, Path]]] = None, | |
row_limit: Optional[int] = None, | |
read_csvs: bool = True, | |
list_csvs: bool = True, | |
query_csvs: bool = True, | |
read_column_names: bool = True, | |
duckdb_connection: Optional[Any] = None, | |
duckdb_kwargs: Optional[Dict[str, Any]] = None, | |
): | |
super().__init__(name="csv_tools") | |
self.csvs: List[Path] = [] | |
if csvs: | |
for _csv in csvs: | |
if isinstance(_csv, str): | |
self.csvs.append(Path(_csv)) | |
elif isinstance(_csv, Path): | |
self.csvs.append(_csv) | |
else: | |
raise ValueError(f"Invalid csv file: {_csv}") | |
self.row_limit = row_limit | |
self.duckdb_connection: Optional[Any] = duckdb_connection | |
self.duckdb_kwargs: Optional[Dict[str, Any]] = duckdb_kwargs | |
if read_csvs: | |
self.register(self.read_csv_file) | |
if list_csvs: | |
self.register(self.list_csv_files) | |
if read_column_names: | |
self.register(self.get_columns) | |
if query_csvs: | |
try: | |
import duckdb # noqa: F401 | |
except ImportError: | |
raise ImportError("`duckdb` not installed. Please install using `pip install duckdb`.") | |
self.register(self.query_csv_file) | |
def list_csv_files(self) -> str: | |
"""Returns a list of available csv files | |
Returns: | |
str: List of available csv files | |
""" | |
return json.dumps([_csv.stem for _csv in self.csvs]) | |
def read_csv_file(self, csv_name: str, row_limit: Optional[int] = None) -> str: | |
"""Use this function to read the contents of a csv file `name` without the extension. | |
Args: | |
csv_name (str): The name of the csv file to read without the extension. | |
row_limit (Optional[int]): The number of rows to return. None returns all rows. Defaults to None. | |
Returns: | |
str: The contents of the csv file if successful, otherwise returns an error message. | |
""" | |
try: | |
if csv_name not in [_csv.stem for _csv in self.csvs]: | |
return f"File: {csv_name} not found, please use one of {self.list_csv_files()}" | |
logger.info(f"Reading file: {csv_name}") | |
file_path = [_csv for _csv in self.csvs if _csv.stem == csv_name][0] | |
# Read the csv file | |
csv_data = [] | |
_row_limit = row_limit or self.row_limit | |
with open(str(file_path), newline="") as csvfile: | |
reader = csv.DictReader(csvfile) | |
if _row_limit is not None: | |
csv_data = [row for row in reader][:_row_limit] | |
else: | |
csv_data = [row for row in reader] | |
return json.dumps(csv_data) | |
except Exception as e: | |
logger.error(f"Error reading csv: {e}") | |
return f"Error reading csv: {e}" | |
def get_columns(self, csv_name: str) -> str: | |
"""Use this function to get the columns of the csv file `csv_name` without the extension. | |
Args: | |
csv_name (str): The name of the csv file to get the columns from without the extension. | |
Returns: | |
str: The columns of the csv file if successful, otherwise returns an error message. | |
""" | |
try: | |
if csv_name not in [_csv.stem for _csv in self.csvs]: | |
return f"File: {csv_name} not found, please use one of {self.list_csv_files()}" | |
logger.info(f"Reading columns from file: {csv_name}") | |
file_path = [_csv for _csv in self.csvs if _csv.stem == csv_name][0] | |
# Get the columns of the csv file | |
with open(str(file_path), newline="") as csvfile: | |
reader = csv.DictReader(csvfile) | |
columns = reader.fieldnames | |
return json.dumps(columns) | |
except Exception as e: | |
logger.error(f"Error getting columns: {e}") | |
return f"Error getting columns: {e}" | |
def query_csv_file(self, csv_name: str, sql_query: str) -> str: | |
"""Use this function to run a SQL query on csv file `csv_name` without the extension. | |
The Table name is the name of the csv file without the extension. | |
The SQL Query should be a valid DuckDB SQL query. | |
Args: | |
csv_name (str): The name of the csv file to query | |
sql_query (str): The SQL Query to run on the csv file. | |
Returns: | |
str: The query results if successful, otherwise returns an error message. | |
""" | |
try: | |
import duckdb | |
if csv_name not in [_csv.stem for _csv in self.csvs]: | |
return f"File: {csv_name} not found, please use one of {self.list_csv_files()}" | |
# Load the csv file into duckdb | |
logger.info(f"Loading csv file: {csv_name}") | |
file_path = [_csv for _csv in self.csvs if _csv.stem == csv_name][0] | |
# Create duckdb connection | |
con = self.duckdb_connection | |
if not self.duckdb_connection: | |
con = duckdb.connect(**(self.duckdb_kwargs or {})) | |
if con is None: | |
logger.error("Error connecting to DuckDB") | |
return "Error connecting to DuckDB, please check the connection." | |
# Create a table from the csv file | |
con.execute(f"CREATE TABLE {csv_name} AS SELECT * FROM read_csv_auto('{file_path}')") | |
# -*- Format the SQL Query | |
# Remove backticks | |
formatted_sql = sql_query.replace("`", "") | |
# If there are multiple statements, only run the first one | |
formatted_sql = formatted_sql.split(";")[0] | |
# -*- Run the SQL Query | |
logger.info(f"Running query: {formatted_sql}") | |
query_result = con.sql(formatted_sql) | |
result_output = "No output" | |
if query_result is not None: | |
try: | |
results_as_python_objects = query_result.fetchall() | |
result_rows = [] | |
for row in results_as_python_objects: | |
if len(row) == 1: | |
result_rows.append(str(row[0])) | |
else: | |
result_rows.append(",".join(str(x) for x in row)) | |
result_data = "\n".join(result_rows) | |
result_output = ",".join(query_result.columns) + "\n" + result_data | |
except AttributeError: | |
result_output = str(query_result) | |
logger.debug(f"Query result: {result_output}") | |
return result_output | |
except Exception as e: | |
logger.error(f"Error querying csv: {e}") | |
return f"Error querying csv: {e}" | |