from haystack.components.embedders import SentenceTransformersTextEmbedder from haystack import Pipeline from haystack_integrations.components.retrievers.chroma import ChromaEmbeddingRetriever from haystack_integrations.document_stores.chroma import ChromaDocumentStore from haystack.components.generators import OpenAIGenerator from haystack.components.builders import PromptBuilder import haystack.logging import streamlit as st from dotenv import load_dotenv from haystack import component import logging haystack.logging.configure_logging(use_json=True) logging.basicConfig( format="%(levelname)s - %(name)s - %(message)s", level=logging.WARNING ) logging.getLogger("haystack").setLevel(logging.INFO) load_dotenv() @component class ListToString: @component.output_types(text=str) def run(self, input_list: list[str]): print(input_list[0]) return {"text": input_list[0]} @st.cache_resource def retrieval_pipeline(path): document_store = ChromaDocumentStore(persist_path=path) retriever = ChromaEmbeddingRetriever(document_store, top_k=5) template = """Transform this query into a imaginary response that the user could expect based on your knowledge. Use 1-3 sentences. Replace entities or names that you invent with . The result should be in German. Query: {{ query}}""" prompt_builder = PromptBuilder(template=template) generator = OpenAIGenerator() # Create a pipeline basic_rag_pipeline = Pipeline() # Add components to your pipeline basic_rag_pipeline.add_component("prompt_builder", prompt_builder) basic_rag_pipeline.add_component("generator", generator) basic_rag_pipeline.add_component("list_to_string", ListToString()) basic_rag_pipeline.add_component("retriever", retriever) basic_rag_pipeline.add_component( "text_embedder", SentenceTransformersTextEmbedder(model="intfloat/multilingual-e5-small"), ) basic_rag_pipeline.connect("prompt_builder", "generator") basic_rag_pipeline.connect("generator.replies", "list_to_string.input_list") basic_rag_pipeline.connect("list_to_string.text", "text_embedder.text") basic_rag_pipeline.connect("text_embedder.embedding", "retriever.query_embedding") return basic_rag_pipeline def generation_pipeline(): template = """ Given the following information, answer the question. Context: {% for document in documents %} {{ document.content }} {% endfor %} Bleibe chronologisch. Erkläre Konzepte und Begriffe wenn nötig. Question: {{question}} Answer: """ prompt_builder = PromptBuilder(template=template) generator = OpenAIGenerator(model="gpt-4") # Create a pipeline basic_rag_pipeline = Pipeline() basic_rag_pipeline.add_component("prompt_builder", prompt_builder) basic_rag_pipeline.add_component("llm", generator) basic_rag_pipeline.connect("prompt_builder", "llm") return basic_rag_pipeline retrieval_pipe = retrieval_pipeline("chatbot/chromadb") generation_pipe = generation_pipeline() prompt = st.chat_input("Say something") if prompt: response = retrieval_pipe.run({"prompt_builder": {"query": prompt}}) st.markdown("### Sources") st.write(response["retriever"]["documents"]) answer = generation_pipe.run( { "prompt_builder": { "question": prompt, "documents": response["retriever"]["documents"], } } ) st.markdown("### Answer") st.write(answer["llm"]["replies"][0])