import os from langchain.document_loaders import ( PyPDFLoader, TextLoader, Docx2txtLoader ) from langchain.text_splitter import CharacterTextSplitter # from PyPDF2 import PdfReader from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_google_genai import GoogleGenerativeAIEmbeddings import google.generativeai as genai from langchain.vectorstores import FAISS from langchain_google_genai import ChatGoogleGenerativeAI from langchain.chains.question_answering import load_qa_chain from langchain.prompts import PromptTemplate from langchain.memory import ConversationBufferMemory from dotenv import load_dotenv from src.agent import build_qa_chain import gradio as gr load_dotenv() genai.configure(api_key=os.getenv("GOOGLE_API_KEY")) class AgentChain: def __init__(self): self.agent = None self.db = None agent_chain = AgentChain() agent_chain.agent = build_qa_chain() def extract_text_from_files(docs): documents = [] files = os.listdir(docs) if len(files) == 0: return "Directory is empty" base_dir = docs.split("/") base_dir = "/".join(base_dir) for file in files: if file.endswith(".pdf"): pdf_path=os.path.join(base_dir, file) loader=PyPDFLoader(pdf_path) documents.extend(loader.load()) elif file.endswith('.docx') or file.endswith('.doc'): doc_path=os.path.join(base_dir, file) loader=Docx2txtLoader(doc_path) documents.extend(loader.load()) elif file.endswith('.txt'): text_path=os.path.join(base_dir, file) loader=TextLoader(text_path) documents.extend(loader.load()) return documents def extract_text_from_file(file): documents = [] filename = str(file) if filename.endswith(".pdf"): loader=PyPDFLoader(file) documents.extend(loader.load()) elif filename.endswith('.docx') or file.endswith('.doc'): loader=Docx2txtLoader(file) documents.extend(loader.load()) elif filename.endswith('.txt'): loader=TextLoader(file) documents.extend(loader.load()) print("Text extracted") return documents def get_text_chunks(text): text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=1000) chunks = text_splitter.split_documents(text) print("Chunks splitted") return chunks def save_in_faiss(text_chunks, save=False): embeddings = GoogleGenerativeAIEmbeddings(model = "models/embedding-001") vector_store = FAISS.from_documents(text_chunks, embedding=embeddings) if save: vector_store.save_local("faiss_index") print("Document search created") return vector_store def process_files(file): documents = extract_text_from_file(file) text_chunks = get_text_chunks(documents) vector_store = save_in_faiss(text_chunks) agent_chain.db = vector_store gr.Info("Processing completed") return file def answer_query(message, history): if agent_chain.db is not None: docs = agent_chain.db.similarity_search(message) docs = [] response = agent_chain.agent({"input_documents": docs, "human_input": message}, return_only_outputs=True) return response['output_text']