Spaces:

singhjagpreet
/

Document-Reader

Sleeping

App Files Files Community

Document-Reader / src /utils.py

singhjagpreet

creating embedding from docs

99a3f34 over 1 year ago

raw

history blame

2.14 kB

	from chainlit.types import AskFileResponse
	import click
	from langchain.document_loaders import TextLoader
	from langchain.document_loaders import PyPDFDirectoryLoader
	from langchain.vectorstores import Chroma


	from src.config import Config
	# import chainlit as cl
	import logging
	import openai
	import os
	from dotenv import load_dotenv

	load_dotenv()




	def process_file(file: AskFileResponse):
	import tempfile

	if file.type == "text/plain":
	Loader = TextLoader
	elif file.type == "application/pdf":
	Loader = PyPDFDirectoryLoader

	with tempfile.NamedTemporaryFile() as tempfile:
	tempfile.write(file.content)
	loader = Loader(tempfile.name)
	documents = loader.load()
	# text_splitter = text_splitter()
	docs = Config.text_splitter.split_documents(documents)

	for i, doc in enumerate(docs):
	doc.metadata["source"] = f"source_{i}"
	return docs

	def get_docSearch(file,cl):
	docs = process_file(file)

	logging.info("files loaded ")

	## save data in user session
	cl.user_session.set("docs",docs)

	logging.info("docs saved in active session")

	docsearch = Chroma.from_documents(docs, Config.embeddings)

	logging.info("embedding completed")

	return docsearch

	def get_source(sources,all_sources,docs,cl):
	answer = []
	source_elements = []
	if sources:
	found_sources = []

	# Add the sources to the message
	for source in sources.split(","):
	source_name = source.strip().replace(".", "")
	# Get the index of the source
	try:
	index = all_sources.index(source_name)
	except ValueError:
	continue
	text = docs[index].page_content
	found_sources.append(source_name)
	# Create the text element referenced in the message
	source_elements.append(cl.Text(content=text, name=source_name))

	if found_sources:
	answer += f"\nSources: {', '.join(found_sources)}"
	else:
	answer += "\nNo sources found"
	return source_elements,answer