Stéphanie Kamgnia Wonkap commited on
Commit
58e5d73
1 Parent(s): 546fe9e

changing to nvidia nim

Browse files
Files changed (2) hide show
  1. app.py +64 -52
  2. src/generator.py +28 -34
app.py CHANGED
@@ -2,12 +2,14 @@
2
  import streamlit as st
3
  import os
4
  import yaml
 
5
  from dotenv import load_dotenv
6
  import torch
7
  from src.generator import answer_with_rag
8
  from ragatouille import RAGPretrainedModel
9
  from src.data_preparation import split_documents
10
  from src.embeddings import init_embedding_model
 
11
 
12
  from transformers import pipeline
13
  from langchain_community.document_loaders import PyPDFLoader
@@ -28,6 +30,7 @@ def load_config():
28
  return cfg
29
 
30
  cfg= load_config()
 
31
  #load_dotenv("./src/.env")
32
  #HF_TOKEN=os.environ.get["HF_TOKEN"]
33
  #st.write(os.environ["HF_TOKEN"] == st.secrets["HF_TOKEN"])
@@ -42,67 +45,76 @@ def main():
42
  st.title("Un RAG pour interroger le Collège de Pédiatrie 2024")
43
  user_query = st.text_input("Entrez votre question:")
44
 
45
-
46
- # Initialize the retriever and LLM
47
 
48
- loader = PyPDFLoader(DATA_FILE_PATH)
49
- #loader = PyPDFDirectoryLoader(DATA_FILE_PATH)
50
- raw_document_base = loader.load()
51
- MARKDOWN_SEPARATORS = [
52
- "\n#{1,6} ",
53
- "```\n",
54
- "\n\\*\\*\\*+\n",
55
- "\n---+\n",
56
- "\n___+\n",
57
- "\n\n",
58
- "\n",
59
- " ",
60
- "",]
61
- docs_processed = split_documents(
62
- 512, # We choose a chunk size adapted to our model
63
- raw_document_base,
64
- tokenizer_name=EMBEDDING_MODEL_NAME,
65
- separator=MARKDOWN_SEPARATORS
66
- )
67
- embedding_model=init_embedding_model(EMBEDDING_MODEL_NAME)
 
 
68
 
69
- if os.path.exists(VECTORDB_PATH):
70
- KNOWLEDGE_VECTOR_DATABASE = FAISS.load_local(
71
- VECTORDB_PATH, embedding_model,
72
- allow_dangerous_deserialization=True)
73
- else:
74
- KNOWLEDGE_VECTOR_DATABASE=init_vectorDB_from_doc(docs_processed, embedding_model)
75
- KNOWLEDGE_VECTOR_DATABASE.save_local(VECTORDB_PATH)
76
 
77
 
78
  if st.button("Get Answer"):
79
  # Get the answer and relevant documents
80
- bnb_config = BitsAndBytesConfig(
81
  #load_in_8bit=True
82
- load_in_4bit=True,
83
- bnb_4bit_use_double_quant=True,
84
- bnb_4bit_quant_type="nf4",
85
- bnb_4bit_compute_dtype=torch.bfloat16,
86
- )
87
- model = AutoModelForCausalLM.from_pretrained(READER_MODEL_NAME, quantization_config=bnb_config,
88
- device_map = 'auto')
89
- tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)
90
 
91
- READER_LLM = pipeline(
92
- model=model,
93
- tokenizer=tokenizer,
94
- task="text-generation",
95
- do_sample=True,
96
- temperature=0.2,
97
- repetition_penalty=1.1,
98
- return_full_text=False,
99
- max_new_tokens=500,
100
- token = os.getenv("HF_TOKEN")
101
  )
102
- RERANKER = RAGPretrainedModel.from_pretrained(RERANKER_MODEL_NAME)
103
- num_doc_before_rerank=15
104
- num_final_releveant_docs=5
105
- answer, relevant_docs = answer_with_rag(query=user_query, READER_MODEL_NAME=READER_MODEL_NAME,embedding_model=embedding_model,vectorDB=KNOWLEDGE_VECTOR_DATABASE,reranker=RERANKER, llm=READER_LLM,num_doc_before_rerank=num_doc_before_rerank,num_final_relevant_docs=num_final_releveant_docs,rerank=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  #print(answer)
107
 
108
 
 
2
  import streamlit as st
3
  import os
4
  import yaml
5
+ from langchain_nvidia_ai_endpoints import ChatNVIDIA
6
  from dotenv import load_dotenv
7
  import torch
8
  from src.generator import answer_with_rag
9
  from ragatouille import RAGPretrainedModel
10
  from src.data_preparation import split_documents
11
  from src.embeddings import init_embedding_model
12
+ from langchain_nvidia_ai_endpoints.embeddings import NVIDIAEmbeddings
13
 
14
  from transformers import pipeline
15
  from langchain_community.document_loaders import PyPDFLoader
 
30
  return cfg
31
 
32
  cfg= load_config()
33
+ #os.environ['NVIDIA_API_KEY']=st.secrets("NVIDIA_API_KEY")
34
  #load_dotenv("./src/.env")
35
  #HF_TOKEN=os.environ.get["HF_TOKEN"]
36
  #st.write(os.environ["HF_TOKEN"] == st.secrets["HF_TOKEN"])
 
45
  st.title("Un RAG pour interroger le Collège de Pédiatrie 2024")
46
  user_query = st.text_input("Entrez votre question:")
47
 
48
+ if KNOWLEDGE_VECTOR_DATABASE not in st.session_state:
49
+ # Initialize the retriever and LLM
50
 
51
+ st.session_state.loader = PyPDFLoader(DATA_FILE_PATH)
52
+ #loader = PyPDFDirectoryLoader(DATA_FILE_PATH)
53
+ st.session_state.raw_document_base = st.session_state.loader.load()
54
+ st.session_state.MARKDOWN_SEPARATORS = [
55
+ "\n#{1,6} ",
56
+ "```\n",
57
+ "\n\\*\\*\\*+\n",
58
+ "\n---+\n",
59
+ "\n___+\n",
60
+ "\n\n",
61
+ "\n",
62
+ " ",
63
+ "",]
64
+ st.session_state.docs_processed = split_documents(
65
+ 512, # We choose a chunk size adapted to our model
66
+ st.session_state.raw_document_base,
67
+ #tokenizer_name=EMBEDDING_MODEL_NAME,
68
+ separator=st.session_state.MARKDOWN_SEPARATORS
69
+ )
70
+ st.session_state.embedding_model=NVIDIAEmbeddings()
71
+ st.session_state.KNOWLEDGE_VECTOR_DATABASE= init_vectorDB_from_doc(st.session_state.docs_processed,
72
+ st.session_state.embedding_model)
73
 
74
+ #if os.path.exists(VECTORDB_PATH):
75
+ # KNOWLEDGE_VECTOR_DATABASE = FAISS.load_local(
76
+ # VECTORDB_PATH, embedding_model,
77
+ # allow_dangerous_deserialization=True)
78
+ #else:
79
+ #KNOWLEDGE_VECTOR_DATABASE=init_vectorDB_from_doc(docs_processed, embedding_model)
80
+ # KNOWLEDGE_VECTOR_DATABASE.save_local(VECTORDB_PATH)
81
 
82
 
83
  if st.button("Get Answer"):
84
  # Get the answer and relevant documents
85
+ #bnb_config = BitsAndBytesConfig(
86
  #load_in_8bit=True
87
+ # load_in_4bit=True,
88
+ # bnb_4bit_use_double_quant=True,
89
+ # bnb_4bit_quant_type="nf4",
90
+ # bnb_4bit_compute_dtype=torch.bfloat16,
91
+ #)
92
+
 
 
93
 
94
+ llm = ChatNVIDIA(
95
+ model=READER_MODEL_NAME,
96
+ api_key= os.get("NVIDIA_API_KEY"),
97
+ temperature=0.2,
98
+ top_p=0.7,
99
+ max_tokens=1024,
 
 
 
 
100
  )
101
+ #tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)
102
+
103
+ #READER_LLM = pipeline(
104
+ # model=model,
105
+ # tokenizer=tokenizer,
106
+ # task="text-generation",
107
+ # do_sample=True,
108
+ # temperature=0.2,
109
+ # repetition_penalty=1.1,
110
+ # return_full_text=False,
111
+ # max_new_tokens=500,
112
+ # token = os.getenv("HF_TOKEN")
113
+ # )
114
+ # RERANKER = RAGPretrainedModel.from_pretrained(RERANKER_MODEL_NAME)
115
+ # num_doc_before_rerank=15
116
+ # num_final_releveant_docs=5
117
+ # answer, relevant_docs = answer_with_rag(query=user_query, READER_MODEL_NAME=READER_MODEL_NAME,embedding_model=embedding_model,vectorDB=KNOWLEDGE_VECTOR_DATABASE,reranker=RERANKER, llm=READER_LLM,num_doc_before_rerank=num_doc_before_rerank,num_final_relevant_docs=num_final_releveant_docs,rerank=True)
118
  #print(answer)
119
 
120
 
src/generator.py CHANGED
@@ -2,53 +2,47 @@
2
  from src.retriever import init_vectorDB_from_doc, retriever
3
 
4
  from transformers import AutoTokenizer, pipeline
 
5
  from typing import List,Optional, Tuple # import the Tuple type
6
  from langchain.docstore.document import Document as LangchainDocument
7
  from langchain_community.vectorstores import FAISS
8
- def promt_template(query: str,READER_MODEL_NAME:str,context:str):
9
- prompt_in_chat_format = [
10
- {
11
- "role": "system",
12
- "content": """Using the information contained in the context,
13
- give a comprehensive answer to the question.
14
  Respond only to the question asked, response should be concise and relevant to the question.
15
- Provide the number of the source document when relevant.If the nswer cannot be deduced from the context, do not give an answer. Please answer in french""",
16
- },
17
- {
18
- "role": "user",
19
- "content": """Context:
20
- {context}
21
-
22
- ---
23
- Now here is the question you need to answer.
24
-
25
- Question: {query}""",
26
- },
27
- ]
28
- tokenizer = AutoTokenizer.from_pretrained(READER_MODEL_NAME)
29
- RAG_PROMPT_TEMPLATE = tokenizer.apply_chat_template(
30
- prompt_in_chat_format, tokenize=False, add_generation_prompt=True)
31
- return RAG_PROMPT_TEMPLATE
32
 
33
  def answer_with_rag(
34
- query: str,embedding_model, vectorDB: FAISS,READER_MODEL_NAME:str,
35
- reranker,llm: pipeline, num_doc_before_rerank: int = 5,
36
- num_final_relevant_docs: int = 5,
37
- rerank: bool = True
38
 
39
  ) -> Tuple[str, List[LangchainDocument]]:
40
  # Build the final prompt
41
- relevant_docs= retriever(query,vectorDB,reranker,num_doc_before_rerank,num_final_relevant_docs,rerank)
42
- context = "\nExtracted documents:\n"
43
- context += "".join([f"Document {str(i)}:::\n" + doc for i, doc in enumerate(relevant_docs)])
44
  #print("=> Context:")
45
  #print(context)
46
- RAG_PROMPT_TEMPLATE = promt_template(query,READER_MODEL_NAME,context)
47
- final_prompt =RAG_PROMPT_TEMPLATE.format(query=query, context=context,READER_MODEL_NAME=READER_MODEL_NAME)
 
48
  print("=> Final prompt:")
49
  #print(final_prompt)
50
  # Redact an answer
51
  print("=> Generating answer...")
52
- answer = llm(final_prompt)[0]["generated_text"]
53
 
54
- return answer, relevant_docs
 
2
  from src.retriever import init_vectorDB_from_doc, retriever
3
 
4
  from transformers import AutoTokenizer, pipeline
5
+ from langchain_core.prompts import ChatPromptTemplate
6
  from typing import List,Optional, Tuple # import the Tuple type
7
  from langchain.docstore.document import Document as LangchainDocument
8
  from langchain_community.vectorstores import FAISS
9
+ from langchain.chains.combine_documents import create_stuff_documents_chain
10
+ from langchain.chains import create_retrieval_chain
11
+ def promt_template():
12
+ prompt_in_chat_format =
13
+ """
14
+ Using the information contained in the given context, give a comprehensive answer to the question.
15
  Respond only to the question asked, response should be concise and relevant to the question.
16
+ Provide the number of the source document when relevant.If the answer cannot be deduced from the context, do not give an answer. Please answer in french,
17
+
18
+ \n\n
19
+ {context} """
20
+ prompt = ChatPromptTemplate.from_template(
21
+ [
22
+ ("system",prompt_in_chat_format),
23
+ ("human", "{query}")
24
+ ])
25
+ #RAG_PROMPT_TEMPLATE = tokenizer.apply_chat_template(
26
+ #prompt_in_chat_format, tokenize=False, add_generation_prompt=True)
27
+ return prompt
 
 
 
 
 
28
 
29
  def answer_with_rag(
30
+ query: str, retriever,llm
 
 
 
31
 
32
  ) -> Tuple[str, List[LangchainDocument]]:
33
  # Build the final prompt
34
+ #relevant_docs= retriever(query,vectorDB,reranker,num_doc_before_rerank,num_final_relevant_docs,rerank)
35
+ #context = "\nExtracted documents:\n"
36
+ #context += "".join([f"Document {str(i)}:::\n" + doc for i, doc in enumerate(relevant_docs)])
37
  #print("=> Context:")
38
  #print(context)
39
+ RAG_PROMPT_TEMPLATE = promt_template()
40
+ document_chain = create_stuff_documents_chain(llm, RAG_PROMPT_TEMPLATE)
41
+ retrieval_chain=create_retrieval_chain(retriever,document_chain)
42
  print("=> Final prompt:")
43
  #print(final_prompt)
44
  # Redact an answer
45
  print("=> Generating answer...")
46
+ response=retrieval_chain.invoke({'query':query})
47
 
48
+ return response['answer'], response["context"]