Spaces:
Sleeping
Sleeping
from chainlit.types import AskFileResponse | |
import click | |
from langchain.document_loaders import TextLoader | |
from langchain.document_loaders import PyPDFDirectoryLoader | |
from langchain.vectorstores import Chroma | |
from src.config import Config | |
# import chainlit as cl | |
import logging | |
import openai | |
import os | |
from dotenv import load_dotenv | |
load_dotenv() | |
def process_file(file: AskFileResponse): | |
import tempfile | |
if file.type == "text/plain": | |
Loader = TextLoader | |
elif file.type == "application/pdf": | |
Loader = PyPDFDirectoryLoader | |
with tempfile.NamedTemporaryFile() as tempfile: | |
tempfile.write(file.content) | |
loader = Loader(tempfile.name) | |
documents = loader.load() | |
# text_splitter = text_splitter() | |
docs = Config.text_splitter.split_documents(documents) | |
for i, doc in enumerate(docs): | |
doc.metadata["source"] = f"source_{i}" | |
return docs | |
def get_docSearch(file,cl): | |
docs = process_file(file) | |
logging.info("files loaded ") | |
## save data in user session | |
cl.user_session.set("docs",docs) | |
logging.info("docs saved in active session") | |
docsearch = Chroma.from_documents(docs, Config.embeddings) | |
logging.info("embedding completed") | |
return docsearch | |
def get_source(sources,all_sources,docs,cl): | |
answer = [] | |
source_elements = [] | |
if sources: | |
found_sources = [] | |
# Add the sources to the message | |
for source in sources.split(","): | |
source_name = source.strip().replace(".", "") | |
# Get the index of the source | |
try: | |
index = all_sources.index(source_name) | |
except ValueError: | |
continue | |
text = docs[index].page_content | |
found_sources.append(source_name) | |
# Create the text element referenced in the message | |
source_elements.append(cl.Text(content=text, name=source_name)) | |
if found_sources: | |
answer += f"\nSources: {', '.join(found_sources)}" | |
else: | |
answer += "\nNo sources found" | |
return source_elements,answer |