jhonparra18 commited on
Commit
f493920
1 Parent(s): de3e20a

updated file structure

Browse files
Files changed (7) hide show
  1. README.md +0 -1
  2. app.py +28 -0
  3. common.py +79 -0
  4. config.py +13 -0
  5. inference_hf.py +47 -0
  6. preprocessing.py +35 -0
  7. requirements.txt +6 -0
README.md CHANGED
@@ -10,4 +10,3 @@ pinned: false
10
  license: apache-2.0
11
  ---
12
 
13
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
10
  license: apache-2.0
11
  ---
12
 
 
app.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import random
3
+ import time
4
+
5
+ from common import DATA
6
+ from config import DEFAULT_BOT_MESSAGE
7
+ from inference_hf import rag_chatbot
8
+
9
+
10
+ DATA = DATA.add_faiss_index("embedding")
11
+
12
+
13
+ async def predict(message, chat_history):
14
+ bot_message = rag_chatbot(message, k=3)
15
+ chat_history.append((message, bot_message))
16
+ return "", chat_history
17
+
18
+
19
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
20
+ chatbot = gr.Chatbot(
21
+ value=[[None, DEFAULT_BOT_MESSAGE]], label="ReformaPensional-Llama3"
22
+ )
23
+ msg = gr.Textbox(placeholder="Haz aquí tu pregunta")
24
+ clear = gr.ClearButton([msg, chatbot])
25
+ msg.submit(predict, [msg, chatbot], [msg, chatbot])
26
+
27
+ if __name__ == "__main__":
28
+ demo.launch()
common.py ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from datasets import Dataset as hfd
3
+ from datasets import load_dataset
4
+ from sentence_transformers import SentenceTransformer
5
+ from transformers import (
6
+ AutoModelForCausalLM,
7
+ AutoTokenizer,
8
+ BitsAndBytesConfig,
9
+ pipeline,
10
+ )
11
+
12
+ from config import DATASET_HF_NAME, LLAMA3_CHECKPOINT
13
+
14
+ # Adapted from HF https://huggingface.co/blog/not-lain/rag-chatbot-using-llama3
15
+
16
+
17
+ def search_topk(
18
+ data: hfd,
19
+ feature_extractor: SentenceTransformer,
20
+ query: str,
21
+ k: int = 3,
22
+ embedding_col: str = "embedding",
23
+ ):
24
+ """a function that embeds a new query and returns the most probable results"""
25
+ embedded_query = feature_extractor.encode(query) # embed new query
26
+ scores, retrieved_examples = data.get_nearest_examples( # retrieve results
27
+ embedding_col,
28
+ embedded_query, # compare our new embedded query with the dataset embeddings
29
+ k=k, # get only top k results
30
+ )
31
+ return scores, retrieved_examples
32
+
33
+
34
+ def format_prompt(
35
+ prompt: str, retrieved_documents: hfd, k: int, text_col: str = "chunk"
36
+ ):
37
+ """using the retrieved documents we will prompt the model to generate our responses"""
38
+ PROMPT = f"Question:{prompt}\nContext:"
39
+ for idx in range(k):
40
+ PROMPT += f"{retrieved_documents[text_col][idx]}\n"
41
+ return PROMPT
42
+
43
+
44
+ # Quantization Config
45
+ bnb_config = BitsAndBytesConfig(
46
+ load_in_4bit=True,
47
+ bnb_4bit_use_double_quant=True,
48
+ bnb_4bit_quant_type="nf4",
49
+ bnb_4bit_compute_dtype=torch.bfloat16,
50
+ )
51
+
52
+ # Tokenizer & Model
53
+ # You must request access to the checkpoints
54
+ TOKENIZER = AutoTokenizer.from_pretrained(LLAMA3_CHECKPOINT)
55
+ MODEL = AutoModelForCausalLM.from_pretrained(
56
+ LLAMA3_CHECKPOINT,
57
+ torch_dtype=torch.bfloat16,
58
+ device_map="auto",
59
+ quantization_config=bnb_config,
60
+ )
61
+ TERMINATORS = [TOKENIZER.eos_token_id, TOKENIZER.convert_tokens_to_ids("<|eot_id|>")]
62
+
63
+ DATA = load_dataset(DATASET_HF_NAME)["train"]
64
+
65
+ TEXT_GENERATION_PIPELINE = pipeline(
66
+ model=MODEL,
67
+ tokenizer=TOKENIZER,
68
+ task="text-generation",
69
+ device_map="auto",
70
+ )
71
+ TEXT_GENERATION_PIPELINE.tokenizer
72
+
73
+ PIPELINE_INFERENCE_ARGS = {
74
+ "max_new_tokens": 256,
75
+ "eos_token_id": TERMINATORS,
76
+ "do_sample": True,
77
+ "temperature": 0.1,
78
+ "top_p": 0.9,
79
+ }
config.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FEATURE_EXTRACTOR_CHECKPOINT = "BAAI/bge-large-en-v1.5"
2
+ DATASET_HF_NAME = "jhonparra18/reforma-pensional-col"
3
+ LLAMA3_CHECKPOINT = "meta-llama/Meta-Llama-3-8B-Instruct"
4
+ SYS_PROMPT_HF = """
5
+ Eres un asistente automático que brinda información referente a la reforma
6
+ pensional del actual gobierno, tu meta es responder a las preguntas y cuestionamientos
7
+ en la manera más precisa y haciendo referencia a los textos de la reforma.
8
+ Siempre responde respecto a la información que se proporciona.
9
+ Tu respuesta jamás debe corresponder a cosas por fuera del texto que se te da.
10
+ """
11
+ MAX_TOKENS_INPUT = 2000
12
+
13
+ DEFAULT_BOT_MESSAGE = "Hola! Soy un chatbot construido con LLama3 para responder preguntas de la reforma pensional. Haz las preguntas que desees"
inference_hf.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from common import DATA, MODEL, TERMINATORS, TOKENIZER, format_prompt, search_topk
2
+ from config import MAX_TOKENS_INPUT, SYS_PROMPT_HF
3
+ from preprocessing import FEATURE_EXTRACTOR
4
+
5
+
6
+ def generate(formatted_prompt):
7
+ formatted_prompt = formatted_prompt[:MAX_TOKENS_INPUT] # to avoid GPU OOM
8
+ messages = [
9
+ {"role": "system", "content": SYS_PROMPT_HF},
10
+ {"role": "user", "content": formatted_prompt},
11
+ ]
12
+
13
+ input_ids = TOKENIZER.apply_chat_template(
14
+ messages, add_generation_prompt=True, return_tensors="pt"
15
+ ).to(MODEL.device)
16
+ outputs = MODEL.generate(
17
+ input_ids,
18
+ max_new_tokens=512,
19
+ eos_token_id=TERMINATORS,
20
+ do_sample=True,
21
+ temperature=0.1,
22
+ top_p=0.9,
23
+ )
24
+ response = outputs[0]
25
+ return TOKENIZER.decode(response[input_ids.shape[-1] :], skip_special_tokens=True)
26
+
27
+
28
+ def rag_chatbot(prompt: str, k: int = 2, return_user: bool = False):
29
+ _, retrieved_documents = search_topk(
30
+ DATA, FEATURE_EXTRACTOR, prompt, k, embedding_col="embedding"
31
+ )
32
+ formatted_prompt = format_prompt(prompt, retrieved_documents, k, text_col="chunk")
33
+ bot_response = generate(formatted_prompt)
34
+ return (
35
+ f"[USER]: {prompt}\n\n[ASSISTANT]: {bot_response}"
36
+ if return_user
37
+ else bot_response
38
+ )
39
+
40
+
41
+ if __name__ == "__main__":
42
+ # example RAG Pipeline using HuggingFace
43
+ DATA = DATA.add_faiss_index("embedding")
44
+ prompt = """indicame qué va a pasar en la reforma pensional con los fondos en el pilar
45
+ contributivo de prima media, podré pedir el dinero de vuelta cuando tenga la edad si no
46
+ cumplo con las semanas cotizadas?"""
47
+ print(rag_chatbot(prompt, k=3, return_user=True))
preprocessing.py ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import textract
2
+ from datasets import Dataset as hfd
3
+ from sentence_transformers import SentenceTransformer
4
+
5
+ from config import FEATURE_EXTRACTOR_CHECKPOINT
6
+
7
+ FEATURE_EXTRACTOR = SentenceTransformer(FEATURE_EXTRACTOR_CHECKPOINT)
8
+
9
+
10
+ def encode_sentence(instance: hfd, text_col: str):
11
+ return {
12
+ "embedding": FEATURE_EXTRACTOR.encode(
13
+ instance[text_col], normalize_embeddings=True
14
+ )
15
+ }
16
+
17
+
18
+ def parse_pdf(pdf_path: str):
19
+ """Gets text from a pdf file using textract"""
20
+ txt = textract.process(pdf_path, method="pdfminer", encoding="latin-1").decode()
21
+ return txt
22
+
23
+
24
+ def chunk_text(text: str, split_sentence="ARTÍCULO"):
25
+ """creates chunks of texts using a split_sentence"""
26
+ chunks = [
27
+ {"chunk": split_sentence + " " + c.replace("\n", " ").strip()}
28
+ for c in text.split(split_sentence)
29
+ ]
30
+ return chunks
31
+
32
+
33
+ def create_df(text_chunks: list[dict[str]]):
34
+ "creates a HuggingFace dataset based on a list of dicts [str,str]"
35
+ return hfd.from_list(text_chunks)
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ transformers
2
+ datasets
3
+ sentence-transformers
4
+ faiss-cpu
5
+ accelerate
6
+ bitsandbytes