import os import pandas as pd from typing import Any, Dict, List, Union from langchain_community.document_loaders import ( TextLoader, PyPDFLoader, UnstructuredWordDocumentLoader, UnstructuredPowerPointLoader, ) from langchain_core.tools import tool def load_file(file_path: str) -> str: """Load a file as str. Args: file_path: Path to the file. Returns: str: Content of the file. Raises: ValueError: If file type is unsupported. """ ext = os.path.splitext(file_path)[-1].lower() if ext in (".csv", ".xlsx", ".xls"): if ext == ".csv": df = pd.read_csv(file_path) else: df = pd.read_excel(file_path) return df elif ext in (".txt", ".pdf", ".docx", ".doc", ".pptx", ".ppt"): if ext == ".txt": loader = TextLoader(file_path, encoding="utf8") elif ext == ".pdf": loader = PyPDFLoader(file_path) elif ext in (".docx", ".doc"): loader = UnstructuredWordDocumentLoader(file_path) elif ext in (".pptx", ".ppt"): loader = UnstructuredPowerPointLoader(file_path) docs = loader.load() if len(docs) > 0: doc = docs[0] return doc.page_content else: return "No content found in the file" else: raise ValueError(f"Unsupported file extension: {ext}") @tool def file_loader_tool(file_path: str) -> str: """Loads a file (csv, xlsx, txt, pdf, docx, etc.) and returns its content as a string. Args: file_path (str): Path to the file to load. """ doc = load_file(file_path) return doc if __name__ == "__main__": print(file_loader_tool.invoke("7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx")) print(file_loader_tool.invoke("test.txt"))