Spaces:
Sleeping
Sleeping
Stéphanie Kamgnia Wonkap
commited on
Commit
•
58e5d73
1
Parent(s):
546fe9e
changing to nvidia nim
Browse files- app.py +64 -52
- src/generator.py +28 -34
app.py
CHANGED
@@ -2,12 +2,14 @@
|
|
2 |
import streamlit as st
|
3 |
import os
|
4 |
import yaml
|
|
|
5 |
from dotenv import load_dotenv
|
6 |
import torch
|
7 |
from src.generator import answer_with_rag
|
8 |
from ragatouille import RAGPretrainedModel
|
9 |
from src.data_preparation import split_documents
|
10 |
from src.embeddings import init_embedding_model
|
|
|
11 |
|
12 |
from transformers import pipeline
|
13 |
from langchain_community.document_loaders import PyPDFLoader
|
@@ -28,6 +30,7 @@ def load_config():
|
|
28 |
return cfg
|
29 |
|
30 |
cfg= load_config()
|
|
|
31 |
#load_dotenv("./src/.env")
|
32 |
#HF_TOKEN=os.environ.get["HF_TOKEN"]
|
33 |
#st.write(os.environ["HF_TOKEN"] == st.secrets["HF_TOKEN"])
|
@@ -42,67 +45,76 @@ def main():
|
|
42 |
st.title("Un RAG pour interroger le Collège de Pédiatrie 2024")
|
43 |
user_query = st.text_input("Entrez votre question:")
|
44 |
|
45 |
-
|
46 |
-
|
47 |
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
|
|
|
|
68 |
|
69 |
-
if os.path.exists(VECTORDB_PATH):
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
else:
|
74 |
-
KNOWLEDGE_VECTOR_DATABASE=init_vectorDB_from_doc(docs_processed, embedding_model)
|
75 |
-
|
76 |
|
77 |
|
78 |
if st.button("Get Answer"):
|
79 |
# Get the answer and relevant documents
|
80 |
-
bnb_config = BitsAndBytesConfig(
|
81 |
#load_in_8bit=True
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
)
|
87 |
-
|
88 |
-
device_map = 'auto')
|
89 |
-
tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)
|
90 |
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
repetition_penalty=1.1,
|
98 |
-
return_full_text=False,
|
99 |
-
max_new_tokens=500,
|
100 |
-
token = os.getenv("HF_TOKEN")
|
101 |
)
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
106 |
#print(answer)
|
107 |
|
108 |
|
|
|
2 |
import streamlit as st
|
3 |
import os
|
4 |
import yaml
|
5 |
+
from langchain_nvidia_ai_endpoints import ChatNVIDIA
|
6 |
from dotenv import load_dotenv
|
7 |
import torch
|
8 |
from src.generator import answer_with_rag
|
9 |
from ragatouille import RAGPretrainedModel
|
10 |
from src.data_preparation import split_documents
|
11 |
from src.embeddings import init_embedding_model
|
12 |
+
from langchain_nvidia_ai_endpoints.embeddings import NVIDIAEmbeddings
|
13 |
|
14 |
from transformers import pipeline
|
15 |
from langchain_community.document_loaders import PyPDFLoader
|
|
|
30 |
return cfg
|
31 |
|
32 |
cfg= load_config()
|
33 |
+
#os.environ['NVIDIA_API_KEY']=st.secrets("NVIDIA_API_KEY")
|
34 |
#load_dotenv("./src/.env")
|
35 |
#HF_TOKEN=os.environ.get["HF_TOKEN"]
|
36 |
#st.write(os.environ["HF_TOKEN"] == st.secrets["HF_TOKEN"])
|
|
|
45 |
st.title("Un RAG pour interroger le Collège de Pédiatrie 2024")
|
46 |
user_query = st.text_input("Entrez votre question:")
|
47 |
|
48 |
+
if KNOWLEDGE_VECTOR_DATABASE not in st.session_state:
|
49 |
+
# Initialize the retriever and LLM
|
50 |
|
51 |
+
st.session_state.loader = PyPDFLoader(DATA_FILE_PATH)
|
52 |
+
#loader = PyPDFDirectoryLoader(DATA_FILE_PATH)
|
53 |
+
st.session_state.raw_document_base = st.session_state.loader.load()
|
54 |
+
st.session_state.MARKDOWN_SEPARATORS = [
|
55 |
+
"\n#{1,6} ",
|
56 |
+
"```\n",
|
57 |
+
"\n\\*\\*\\*+\n",
|
58 |
+
"\n---+\n",
|
59 |
+
"\n___+\n",
|
60 |
+
"\n\n",
|
61 |
+
"\n",
|
62 |
+
" ",
|
63 |
+
"",]
|
64 |
+
st.session_state.docs_processed = split_documents(
|
65 |
+
512, # We choose a chunk size adapted to our model
|
66 |
+
st.session_state.raw_document_base,
|
67 |
+
#tokenizer_name=EMBEDDING_MODEL_NAME,
|
68 |
+
separator=st.session_state.MARKDOWN_SEPARATORS
|
69 |
+
)
|
70 |
+
st.session_state.embedding_model=NVIDIAEmbeddings()
|
71 |
+
st.session_state.KNOWLEDGE_VECTOR_DATABASE= init_vectorDB_from_doc(st.session_state.docs_processed,
|
72 |
+
st.session_state.embedding_model)
|
73 |
|
74 |
+
#if os.path.exists(VECTORDB_PATH):
|
75 |
+
# KNOWLEDGE_VECTOR_DATABASE = FAISS.load_local(
|
76 |
+
# VECTORDB_PATH, embedding_model,
|
77 |
+
# allow_dangerous_deserialization=True)
|
78 |
+
#else:
|
79 |
+
#KNOWLEDGE_VECTOR_DATABASE=init_vectorDB_from_doc(docs_processed, embedding_model)
|
80 |
+
# KNOWLEDGE_VECTOR_DATABASE.save_local(VECTORDB_PATH)
|
81 |
|
82 |
|
83 |
if st.button("Get Answer"):
|
84 |
# Get the answer and relevant documents
|
85 |
+
#bnb_config = BitsAndBytesConfig(
|
86 |
#load_in_8bit=True
|
87 |
+
# load_in_4bit=True,
|
88 |
+
# bnb_4bit_use_double_quant=True,
|
89 |
+
# bnb_4bit_quant_type="nf4",
|
90 |
+
# bnb_4bit_compute_dtype=torch.bfloat16,
|
91 |
+
#)
|
92 |
+
|
|
|
|
|
93 |
|
94 |
+
llm = ChatNVIDIA(
|
95 |
+
model=READER_MODEL_NAME,
|
96 |
+
api_key= os.get("NVIDIA_API_KEY"),
|
97 |
+
temperature=0.2,
|
98 |
+
top_p=0.7,
|
99 |
+
max_tokens=1024,
|
|
|
|
|
|
|
|
|
100 |
)
|
101 |
+
#tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)
|
102 |
+
|
103 |
+
#READER_LLM = pipeline(
|
104 |
+
# model=model,
|
105 |
+
# tokenizer=tokenizer,
|
106 |
+
# task="text-generation",
|
107 |
+
# do_sample=True,
|
108 |
+
# temperature=0.2,
|
109 |
+
# repetition_penalty=1.1,
|
110 |
+
# return_full_text=False,
|
111 |
+
# max_new_tokens=500,
|
112 |
+
# token = os.getenv("HF_TOKEN")
|
113 |
+
# )
|
114 |
+
# RERANKER = RAGPretrainedModel.from_pretrained(RERANKER_MODEL_NAME)
|
115 |
+
# num_doc_before_rerank=15
|
116 |
+
# num_final_releveant_docs=5
|
117 |
+
# answer, relevant_docs = answer_with_rag(query=user_query, READER_MODEL_NAME=READER_MODEL_NAME,embedding_model=embedding_model,vectorDB=KNOWLEDGE_VECTOR_DATABASE,reranker=RERANKER, llm=READER_LLM,num_doc_before_rerank=num_doc_before_rerank,num_final_relevant_docs=num_final_releveant_docs,rerank=True)
|
118 |
#print(answer)
|
119 |
|
120 |
|
src/generator.py
CHANGED
@@ -2,53 +2,47 @@
|
|
2 |
from src.retriever import init_vectorDB_from_doc, retriever
|
3 |
|
4 |
from transformers import AutoTokenizer, pipeline
|
|
|
5 |
from typing import List,Optional, Tuple # import the Tuple type
|
6 |
from langchain.docstore.document import Document as LangchainDocument
|
7 |
from langchain_community.vectorstores import FAISS
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
give a comprehensive answer to the question.
|
14 |
Respond only to the question asked, response should be concise and relevant to the question.
|
15 |
-
Provide the number of the source document when relevant.If the
|
16 |
-
|
17 |
-
|
18 |
-
"
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
]
|
28 |
-
tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)
|
29 |
-
RAG_PROMPT_TEMPLATE = tokenizer.apply_chat_template(
|
30 |
-
prompt_in_chat_format, tokenize=False, add_generation_prompt=True)
|
31 |
-
return RAG_PROMPT_TEMPLATE
|
32 |
|
33 |
def answer_with_rag(
|
34 |
-
query: str,
|
35 |
-
reranker,llm: pipeline, num_doc_before_rerank: int = 5,
|
36 |
-
num_final_relevant_docs: int = 5,
|
37 |
-
rerank: bool = True
|
38 |
|
39 |
) -> Tuple[str, List[LangchainDocument]]:
|
40 |
# Build the final prompt
|
41 |
-
relevant_docs= retriever(query,vectorDB,reranker,num_doc_before_rerank,num_final_relevant_docs,rerank)
|
42 |
-
context = "\nExtracted documents:\n"
|
43 |
-
context += "".join([f"Document {str(i)}:::\n" + doc for i, doc in enumerate(relevant_docs)])
|
44 |
#print("=> Context:")
|
45 |
#print(context)
|
46 |
-
RAG_PROMPT_TEMPLATE = promt_template(
|
47 |
-
|
|
|
48 |
print("=> Final prompt:")
|
49 |
#print(final_prompt)
|
50 |
# Redact an answer
|
51 |
print("=> Generating answer...")
|
52 |
-
|
53 |
|
54 |
-
return answer,
|
|
|
2 |
from src.retriever import init_vectorDB_from_doc, retriever
|
3 |
|
4 |
from transformers import AutoTokenizer, pipeline
|
5 |
+
from langchain_core.prompts import ChatPromptTemplate
|
6 |
from typing import List,Optional, Tuple # import the Tuple type
|
7 |
from langchain.docstore.document import Document as LangchainDocument
|
8 |
from langchain_community.vectorstores import FAISS
|
9 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
10 |
+
from langchain.chains import create_retrieval_chain
|
11 |
+
def promt_template():
|
12 |
+
prompt_in_chat_format =
|
13 |
+
"""
|
14 |
+
Using the information contained in the given context, give a comprehensive answer to the question.
|
15 |
Respond only to the question asked, response should be concise and relevant to the question.
|
16 |
+
Provide the number of the source document when relevant.If the answer cannot be deduced from the context, do not give an answer. Please answer in french,
|
17 |
+
|
18 |
+
\n\n
|
19 |
+
{context} """
|
20 |
+
prompt = ChatPromptTemplate.from_template(
|
21 |
+
[
|
22 |
+
("system",prompt_in_chat_format),
|
23 |
+
("human", "{query}")
|
24 |
+
])
|
25 |
+
#RAG_PROMPT_TEMPLATE = tokenizer.apply_chat_template(
|
26 |
+
#prompt_in_chat_format, tokenize=False, add_generation_prompt=True)
|
27 |
+
return prompt
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
def answer_with_rag(
|
30 |
+
query: str, retriever,llm
|
|
|
|
|
|
|
31 |
|
32 |
) -> Tuple[str, List[LangchainDocument]]:
|
33 |
# Build the final prompt
|
34 |
+
#relevant_docs= retriever(query,vectorDB,reranker,num_doc_before_rerank,num_final_relevant_docs,rerank)
|
35 |
+
#context = "\nExtracted documents:\n"
|
36 |
+
#context += "".join([f"Document {str(i)}:::\n" + doc for i, doc in enumerate(relevant_docs)])
|
37 |
#print("=> Context:")
|
38 |
#print(context)
|
39 |
+
RAG_PROMPT_TEMPLATE = promt_template()
|
40 |
+
document_chain = create_stuff_documents_chain(llm, RAG_PROMPT_TEMPLATE)
|
41 |
+
retrieval_chain=create_retrieval_chain(retriever,document_chain)
|
42 |
print("=> Final prompt:")
|
43 |
#print(final_prompt)
|
44 |
# Redact an answer
|
45 |
print("=> Generating answer...")
|
46 |
+
response=retrieval_chain.invoke({'query':query})
|
47 |
|
48 |
+
return response['answer'], response["context"]
|