connectwithprakash's picture
Add file_loader tool for loading various file types
8fa2c1a
import os
import pandas as pd
from typing import Any, Dict, List, Union
from langchain_community.document_loaders import (
TextLoader,
PyPDFLoader,
UnstructuredWordDocumentLoader,
UnstructuredPowerPointLoader,
)
from langchain_core.tools import tool
def load_file(file_path: str) -> str:
"""Load a file as str.
Args:
file_path: Path to the file.
Returns:
str: Content of the file.
Raises:
ValueError: If file type is unsupported.
"""
ext = os.path.splitext(file_path)[-1].lower()
if ext in (".csv", ".xlsx", ".xls"):
if ext == ".csv":
df = pd.read_csv(file_path)
else:
df = pd.read_excel(file_path)
return df
elif ext in (".txt", ".pdf", ".docx", ".doc", ".pptx", ".ppt"):
if ext == ".txt":
loader = TextLoader(file_path, encoding="utf8")
elif ext == ".pdf":
loader = PyPDFLoader(file_path)
elif ext in (".docx", ".doc"):
loader = UnstructuredWordDocumentLoader(file_path)
elif ext in (".pptx", ".ppt"):
loader = UnstructuredPowerPointLoader(file_path)
docs = loader.load()
if len(docs) > 0:
doc = docs[0]
return doc.page_content
else:
return "No content found in the file"
else:
raise ValueError(f"Unsupported file extension: {ext}")
@tool
def file_loader_tool(file_path: str) -> str:
"""Loads a file (csv, xlsx, txt, pdf, docx, etc.) and returns its content as a string.
Args:
file_path (str): Path to the file to load.
"""
doc = load_file(file_path)
return doc
if __name__ == "__main__":
print(file_loader_tool.invoke("7bd855d8-463d-4ed5-93ca-5fe35145f733.xlsx"))
print(file_loader_tool.invoke("test.txt"))