Spaces:
Sleeping
Sleeping
File size: 4,846 Bytes
234eac0 249d2c8 637aeec 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 637aeec 249d2c8 234eac0 637aeec 234eac0 249d2c8 637aeec 234eac0 249d2c8 637aeec 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 637aeec 249d2c8 637aeec 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 234eac0 249d2c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
import os
from openai import AsyncOpenAI
from RagPipeline import RetrievalAugmentedQAPipeline
from typing import List
from chainlit.types import AskFileResponse
from chainlit.cli import run_chainlit
from aimakerspace.text_utils import CharacterTextSplitter, PdfFileLoader, TextFileLoader
from aimakerspace.openai_utils.prompts import (
UserRolePrompt,
SystemRolePrompt,
AssistantRolePrompt,
)
from aimakerspace.openai_utils.embedding import EmbeddingModel
from aimakerspace.vectordatabase import VectorDatabase, VectorDatabaseOptions
from aimakerspace.openai_utils.chatmodel import ChatOpenAI
import chainlit as cl
from langchain_text_splitters import RecursiveCharacterTextSplitter
# Instrument the OpenAI client
# cl.instrument_openai()
##### Prompt Templates #####
system_template = """\
Use the following context to answer a users question. If you cannot find the answer in the context, say you don't know the answer."""
user_prompt_template = """\
Context:
{context}
Question:
{question}
"""
system_role_prompt = SystemRolePrompt(system_template)
user_role_prompt = UserRolePrompt(user_prompt_template)
### Text Chunking ###
# text_splitter = CharacterTextSplitter()
text_splitter = RecursiveCharacterTextSplitter(
separators=[
"\n\n",
"\n",
" ",
".",
",",
"\u200b", # Zero-width space
"\uff0c", # Fullwidth comma
"\u3001", # Ideographic comma
"\uff0e", # Fullwidth full stop
"\u3002", # Ideographic full stop
"",
],
)
def process_text_file(file: AskFileResponse) -> List[str]:
import tempfile
with tempfile.NamedTemporaryFile(
mode="wb", delete=False, suffix=".txt"
) as temp_file:
temp_file_path = temp_file.name
temp_file.write(file.content)
text_loader = TextFileLoader(temp_file_path)
documents = text_loader.load_documents()
texts = []
for doc in documents:
texts += text_splitter.split_text(doc)
return texts
def process_pdf_file(file: AskFileResponse) -> List[str]:
import tempfile
with tempfile.NamedTemporaryFile(
mode="wb", delete=False, suffix=".pdf"
) as temp_file:
temp_file_path = temp_file.name
temp_file.write(file.content)
pdf_loader = PdfFileLoader(temp_file_path)
texts = pdf_loader.load_documents() # Also handles splitting the text in this case pages
return texts
async def send_new_message(content, elemets=None):
msg = cl.Message(content,elements=elemets)
await msg.send()
return msg
@cl.on_chat_start
async def on_chat_start():
print("On Chat Start")
# await send_new_message("Welcome to the Chat with Files app!")
msg = cl.Message(content="Welcome to the Chat with Files app!")
await msg.send()
print("After First message")
files = None
# Wait for the user to upload a file
while files == None:
files = await cl.AskFileMessage(
content="Please upload a text file to begin!",
accept=["text/plain", "application/pdf"],
max_size_mb=10,
max_files=4,
timeout=180,
).send()
texts : List[str] = []
for file in files:
if file.type == "application/pdf":
texts += process_pdf_file(file)
if file.type == "text/plain":
texts += process_text_file(file)
# await send_new_message(content=f"Processing `{file.name}`...")
msg = cl.Message(content=f"Processing `{file.name}`...")
await msg.send()
print(f"Processing {len(texts)} text chunks")
# Create a dict vector store
vector_db_options =VectorDatabaseOptions.QDRANT
embedding_model = EmbeddingModel(embeddings_model_name= "text-embedding-3-small",dimensions=1000)
vector_db = VectorDatabase(vector_db_options,embedding_model)
vector_db = await vector_db.abuild_from_list(texts)
chat_openai = ChatOpenAI()
# Create a chain
retrieval_augmented_qa_pipeline = RetrievalAugmentedQAPipeline(system_role_prompt, user_role_prompt,
vector_db_retriever=vector_db, llm=chat_openai
)
# Let the user know that the system is ready
msg = cl.Message(content=f"Processing `{file.name}` done. You can now ask questions!")
await msg.send()
cl.user_session.set("chain", retrieval_augmented_qa_pipeline)
@cl.on_message
async def main(message: cl.Message):
msg = cl.Message(content="on message")
await msg.send()
chain :RetrievalAugmentedQAPipeline = cl.user_session.get("chain")
msg = cl.Message(content="")
result = await chain.arun_pipeline(message.content)
async for stream_resp in result.get('response'):
await msg.stream_token(stream_resp)
await msg.send()
cl.user_session.set("chain", chain)
if __name__ == "__main__":
run_chainlit(__file__)
|