Spaces:
Sleeping
Sleeping
File size: 2,135 Bytes
5e20c77 99a3f34 5e20c77 10330bc 99a3f34 5e20c77 10330bc 99a3f34 10330bc 5e20c77 99a3f34 10330bc 99a3f34 10330bc 99a3f34 10330bc 99a3f34 10330bc 99a3f34 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 |
from chainlit.types import AskFileResponse
import click
from langchain.document_loaders import TextLoader
from langchain.document_loaders import PyPDFDirectoryLoader
from langchain.vectorstores import Chroma
from src.config import Config
# import chainlit as cl
import logging
import openai
import os
from dotenv import load_dotenv
load_dotenv()
def process_file(file: AskFileResponse):
import tempfile
if file.type == "text/plain":
Loader = TextLoader
elif file.type == "application/pdf":
Loader = PyPDFDirectoryLoader
with tempfile.NamedTemporaryFile() as tempfile:
tempfile.write(file.content)
loader = Loader(tempfile.name)
documents = loader.load()
# text_splitter = text_splitter()
docs = Config.text_splitter.split_documents(documents)
for i, doc in enumerate(docs):
doc.metadata["source"] = f"source_{i}"
return docs
def get_docSearch(file,cl):
docs = process_file(file)
logging.info("files loaded ")
## save data in user session
cl.user_session.set("docs",docs)
logging.info("docs saved in active session")
docsearch = Chroma.from_documents(docs, Config.embeddings)
logging.info("embedding completed")
return docsearch
def get_source(sources,all_sources,docs,cl):
answer = []
source_elements = []
if sources:
found_sources = []
# Add the sources to the message
for source in sources.split(","):
source_name = source.strip().replace(".", "")
# Get the index of the source
try:
index = all_sources.index(source_name)
except ValueError:
continue
text = docs[index].page_content
found_sources.append(source_name)
# Create the text element referenced in the message
source_elements.append(cl.Text(content=text, name=source_name))
if found_sources:
answer += f"\nSources: {', '.join(found_sources)}"
else:
answer += "\nNo sources found"
return source_elements,answer |