File size: 4,664 Bytes
58b3e09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fecdcde
58b3e09
fecdcde
58b3e09
 
 
 
fecdcde
58b3e09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2696bd7
 
 
58b3e09
 
 
 
 
 
2696bd7
58b3e09
 
0ac60e1
58b3e09
 
 
 
 
 
 
 
2696bd7
58b3e09
 
2696bd7
58b3e09
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ef8d9f7
58b3e09
ef8d9f7
58b3e09
 
ef8d9f7
 
 
 
 
58b3e09
 
 
ef8d9f7
 
58b3e09
 
 
 
2696bd7
58b3e09
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import json
import os
import warnings
from typing import List
from operator import itemgetter
from dotenv import load_dotenv

from langfuse.callback import CallbackHandler
langfuse_handler = CallbackHandler(
    public_key="pk-lf-b5beeffa-b30f-4390-b850-5a1628c7bc5e",
    secret_key="sk-lf-a3e49606-e293-4d32-b451-21627a02556e",
    host="https://cloud.langfuse.com"
)

import gradio as gr

from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI

from langchain.docstore.document import Document

from langchain.prompts import PromptTemplate
from langchain.prompts import ChatPromptTemplate

from langchain.chains import RetrievalQA

from langchain_core.output_parsers import StrOutputParser, CommaSeparatedListOutputParser
from pydantic import BaseModel, Field
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables import RunnableParallel

from langchain_community.vectorstores import FAISS
from langchain_community.document_transformers import EmbeddingsRedundantFilter

from langchain.retrievers import EnsembleRetriever
from langchain.retrievers.merger_retriever import MergerRetriever
from langchain.retrievers.document_compressors import DocumentCompressorPipeline
from langchain.retrievers import ContextualCompressionRetriever

#from langchain_cohere import  CohereEmbeddings, CohereRerank

#from langchain_groq import ChatGroq

load_dotenv()

#os.getenv("COHERE_API_KEY")
#os.getenv("GROQ_API_KEY")
os.getenv("OPENAI_API_KEY")

embeddings = OpenAIEmbeddings(model="text-embedding-3-large")
#embeddings_cohere = CohereEmbeddings(model="embed-multilingual-v3.0")

def load_vectorstore(index_name, embeddings, k=20):
    return FAISS.load_local(index_name, embeddings, allow_dangerous_deserialization=True).as_retriever(
                            search_kwargs={ "k": k}
    )

retriever_names = ['large']

retrievers = {}
retrievers_docs = {}

for name in retriever_names:
    retrievers[name] = load_vectorstore(f"{name}", embeddings)
    retrievers_docs[name] = (lambda x: x["input"]) | retrievers[name]


def format_docs_with_id(docs: List[Document]) -> str:
    """
    """
    formatted = [
        (
            f"Metadata: {doc.metadata}\n"
            f"Content: {doc.page_content}\n"

        )
        for doc in docs
    ]
    return "\n\n" + "\n\n".join(formatted)


def prompt_fn():

    return (
            "You are an expert pharmachemist, answer the question based on the context. Do not answer anything not related to pharmachemistry."
            "\n\nHere is the context: "
            "{context}"
            )

llm = ChatOpenAI(temperature=0, model="gpt-4o")

retrieve_docs = {name: (lambda x: x["input"]) | retrievers[name] for name in retriever_names}

def legal(question):

    prompt = ChatPromptTemplate.from_messages([
    ("system", prompt_fn()),
    ("human", "{input}"),
    ])


    rag_chain_from_docs = (
        RunnablePassthrough.assign(context=(lambda x: format_docs_with_id(x["context"])))
        | prompt
        | llm
    )
          
    chains = {
        name: RunnablePassthrough.assign(context=retrieve_docs[name]).assign(answer=rag_chain_from_docs)
        for name in retriever_names
    }

    name = "large"
    
    if name not in chains:
        raise ValueError(f"Invalid typologie: {name}")
    #try:
    #result = chains[name].invoke({"input": question})
    result = chains[name].invoke({"input": question}, config={"callbacks": [langfuse_handler]})
    return result["answer"].content #result["answer"].articles, result["answer"].citations
    #except Exception as e:
    #    return "Je ne sais pa#| "", ""
        

with gr.Blocks() as demo:
    #gr.Markdown("## OnScent Fragrance Intelligent Library Search")
    #gr.Markdown("Developed by ScentGenie")
    #gr.Markdown("### Client Brief or Description")
    
    with gr.Row():
        input1 = gr.Textbox(label="Question", placeholder="effets indesirables du paracetamol")
    #gr.Markdown("### Additional Criterias")
    #gr.Markdown("Criterias like Application area, RMC, Notes to exclude etc")
    #with gr.Row():
    #    input2 = gr.Textbox(label="Additional Criterias (can be left empy)", placeholder=" for hair products with RMC under 15$ and without vanilla note")
    #with gr.Row():
    #    input3 = gr.Dropdown(["Advanced", "Fast"], label="Mode", value="Advanced")
        
    #gr.Markdown("## Recommended Formulas")
    output1 = gr.Text(label="Reponse")
    #output2 = gr.Text(label="Documents Sources")
    #output3 = gr.Text(label="Documents IDs")

    btn = gr.Button("Submit")
    btn.click(legal, inputs=[input1], outputs=[output1])

demo.launch(debug=True)