daniel.diaz commited on
Commit
1300f65
·
1 Parent(s): 2dc4537

DD Cambios POC

Browse files
Files changed (4) hide show
  1. Dockerfile +23 -0
  2. app.py +104 -0
  3. rag_jujutsu_poc_joblib.ipynb +479 -0
  4. requirements.txt +7 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+
3
+ # Set working directory
4
+ WORKDIR /app
5
+
6
+ # Install system dependencies
7
+ RUN apt-get update && apt-get install -y build-essential && rm -rf /var/lib/apt/lists/*
8
+
9
+ # Copy requirements and install Python packages
10
+ COPY requirements.txt .
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy app files
14
+ COPY . .
15
+
16
+ # Default to environment variable (can be set in Hugging Face Secrets or locally)
17
+ ENV OPENAI_API_KEY=${OPENAI_API_KEY}
18
+
19
+ # Streamlit will bind to port 7860 on all interfaces
20
+ EXPOSE 7860
21
+
22
+ # Run the Streamlit app
23
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0"]
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from PyPDF2 import PdfReader
3
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
4
+ from sentence_transformers import SentenceTransformer
5
+ import openai
6
+ import faiss
7
+ import numpy as np
8
+ import os
9
+ import joblib
10
+ from openai import OpenAI
11
+
12
+ client = OpenAI() # Uses env var OPENAI_API_KEY
13
+
14
+ @st.cache_data
15
+ def load_pdf_chunks(pdf_path):
16
+ reader = PdfReader(pdf_path)
17
+ raw_text = ""
18
+ for page in reader.pages:
19
+ raw_text += page.extract_text() + "\n"
20
+ splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
21
+ return splitter.split_text(raw_text)
22
+
23
+ @st.cache_resource
24
+ def load_model_and_index(chunks):
25
+ model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
26
+ embeddings = model.encode(chunks)
27
+ faiss_index = faiss.IndexFlatL2(embeddings.shape[1])
28
+ faiss_index.add(np.array(embeddings))
29
+ joblib.dump((model, chunks, faiss_index), "rag_model.joblib")
30
+ return model, chunks, faiss_index
31
+
32
+ def search(query, model, chunks, index, k=3):
33
+ query_vec = model.encode([query])
34
+ scores, indices = index.search(np.array(query_vec), k)
35
+ return [chunks[i] for i in indices[0]]
36
+
37
+ def chat_no_rag(question, max_tokens=250):
38
+ response = client.chat.completions.create(
39
+ model="gpt-4o",
40
+ messages=[
41
+ {"role": "user", "content": question}
42
+ ],
43
+ temperature=0.5,
44
+ max_tokens=max_tokens
45
+ )
46
+ return response.choices[0].message.content
47
+
48
+ def chat_with_rag(question, retrieved_chunks, max_tokens=300):
49
+ context = "\n".join(retrieved_chunks)
50
+ prompt = f"Usa el siguiente contexto para responder la pregunta:\n\n{context}\n\nPregunta: {question}"
51
+
52
+ response = client.chat.completions.create(
53
+ model="gpt-4o",
54
+ messages=[{"role": "user", "content": prompt}],
55
+ temperature=0.3,
56
+ max_tokens=max_tokens
57
+ )
58
+ return response.choices[0].message.content
59
+
60
+ def chat_with_rag_enhanced(question, retrieved_chunks, max_tokens=300):
61
+ context = "\n".join(retrieved_chunks)
62
+ prompt = (
63
+ "Eres un experto en historia marcial. "
64
+ "Usa el siguiente contexto histórico para responder con precisión y detalle.\n\n"
65
+ f"Contexto:\n{context}\n\n"
66
+ f"Pregunta: {question}\nRespuesta:"
67
+ )
68
+
69
+ response = client.chat.completions.create(
70
+ model="gpt-4o",
71
+ messages=[{"role": "user", "content": prompt}],
72
+ temperature=0.2,
73
+ max_tokens=max_tokens
74
+ )
75
+ return response.choices[0].message.content
76
+
77
+ # Streamlit UI
78
+ st.title("📜 RAG JuJutsu Historico - ChatGPT + HF + Streamlit")
79
+
80
+ if "model" not in st.session_state:
81
+ with st.spinner("Cargando y procesando el PDF..."):
82
+ chunks = load_pdf_chunks("JuJutsu-Contexto-Significado-Conexiones-Historia.pdf")
83
+ model, chunks, index = load_model_and_index(chunks)
84
+ st.session_state.model = model
85
+ st.session_state.chunks = chunks
86
+ st.session_state.index = index
87
+
88
+ query = st.text_input("Escribe tu pregunta sobre JuJutsu histórico:")
89
+ max_tokens = st.slider("Máximo de tokens de respuesta", 50, 1000, 300, step=50)
90
+
91
+ if query:
92
+ model = st.session_state.model
93
+ chunks = st.session_state.chunks
94
+ index = st.session_state.index
95
+
96
+ st.subheader("🔹 Respuesta sin RAG:")
97
+ st.write(chat_no_rag(query, max_tokens=max_tokens))
98
+
99
+ st.subheader("🔹 Respuesta con RAG:")
100
+ retrieved = search(query, model, chunks, index)
101
+ st.write(chat_with_rag(query, retrieved, max_tokens=max_tokens))
102
+
103
+ st.subheader("🔹 Respuesta con RAG + Mejora de Prompt:")
104
+ st.write(chat_with_rag_enhanced(query, retrieved, max_tokens=max_tokens))
rag_jujutsu_poc_joblib.ipynb ADDED
@@ -0,0 +1,479 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "markdown",
5
+ "id": "ebeba428",
6
+ "metadata": {},
7
+ "source": [
8
+ "# ✅ RAG JuJutsu PoC (Notebook with Joblib, FAISS, ChatGPT API)"
9
+ ]
10
+ },
11
+ {
12
+ "cell_type": "code",
13
+ "execution_count": null,
14
+ "id": "8bdfd3c8",
15
+ "metadata": {},
16
+ "outputs": [
17
+ {
18
+ "name": "stdout",
19
+ "output_type": "stream",
20
+ "text": [
21
+ "Requirement already satisfied: ipywidgets==7.7.2 in /opt/anaconda3/lib/python3.11/site-packages (7.7.2)\n",
22
+ "Requirement already satisfied: ipykernel>=4.5.1 in /opt/anaconda3/lib/python3.11/site-packages (from ipywidgets==7.7.2) (6.28.0)\n",
23
+ "Requirement already satisfied: ipython-genutils~=0.2.0 in /opt/anaconda3/lib/python3.11/site-packages (from ipywidgets==7.7.2) (0.2.0)\n",
24
+ "Requirement already satisfied: traitlets>=4.3.1 in /opt/anaconda3/lib/python3.11/site-packages (from ipywidgets==7.7.2) (5.7.1)\n",
25
+ "Requirement already satisfied: widgetsnbextension~=3.6.0 in /opt/anaconda3/lib/python3.11/site-packages (from ipywidgets==7.7.2) (3.6.10)\n",
26
+ "Requirement already satisfied: ipython>=4.0.0 in /opt/anaconda3/lib/python3.11/site-packages (from ipywidgets==7.7.2) (8.20.0)\n",
27
+ "Requirement already satisfied: jupyterlab-widgets<3,>=1.0.0 in /opt/anaconda3/lib/python3.11/site-packages (from ipywidgets==7.7.2) (1.1.11)\n",
28
+ "Requirement already satisfied: appnope in /opt/anaconda3/lib/python3.11/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2) (0.1.2)\n",
29
+ "Requirement already satisfied: comm>=0.1.1 in /opt/anaconda3/lib/python3.11/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2) (0.1.2)\n",
30
+ "Requirement already satisfied: debugpy>=1.6.5 in /opt/anaconda3/lib/python3.11/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2) (1.6.7)\n",
31
+ "Requirement already satisfied: jupyter-client>=6.1.12 in /opt/anaconda3/lib/python3.11/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2) (8.6.0)\n",
32
+ "Requirement already satisfied: jupyter-core!=5.0.*,>=4.12 in /opt/anaconda3/lib/python3.11/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2) (5.5.0)\n",
33
+ "Requirement already satisfied: matplotlib-inline>=0.1 in /opt/anaconda3/lib/python3.11/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2) (0.1.6)\n",
34
+ "Requirement already satisfied: nest-asyncio in /opt/anaconda3/lib/python3.11/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2) (1.6.0)\n",
35
+ "Requirement already satisfied: packaging in /opt/anaconda3/lib/python3.11/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2) (23.2)\n",
36
+ "Requirement already satisfied: psutil in /opt/anaconda3/lib/python3.11/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2) (5.9.0)\n",
37
+ "Requirement already satisfied: pyzmq>=24 in /opt/anaconda3/lib/python3.11/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2) (25.1.2)\n",
38
+ "Requirement already satisfied: tornado>=6.1 in /opt/anaconda3/lib/python3.11/site-packages (from ipykernel>=4.5.1->ipywidgets==7.7.2) (6.3.3)\n",
39
+ "Requirement already satisfied: decorator in /opt/anaconda3/lib/python3.11/site-packages (from ipython>=4.0.0->ipywidgets==7.7.2) (5.1.1)\n",
40
+ "Requirement already satisfied: jedi>=0.16 in /opt/anaconda3/lib/python3.11/site-packages (from ipython>=4.0.0->ipywidgets==7.7.2) (0.18.1)\n",
41
+ "Requirement already satisfied: prompt-toolkit<3.1.0,>=3.0.41 in /opt/anaconda3/lib/python3.11/site-packages (from ipython>=4.0.0->ipywidgets==7.7.2) (3.0.43)\n",
42
+ "Requirement already satisfied: pygments>=2.4.0 in /opt/anaconda3/lib/python3.11/site-packages (from ipython>=4.0.0->ipywidgets==7.7.2) (2.15.1)\n",
43
+ "Requirement already satisfied: stack-data in /opt/anaconda3/lib/python3.11/site-packages (from ipython>=4.0.0->ipywidgets==7.7.2) (0.2.0)\n",
44
+ "Requirement already satisfied: pexpect>4.3 in /opt/anaconda3/lib/python3.11/site-packages (from ipython>=4.0.0->ipywidgets==7.7.2) (4.8.0)\n",
45
+ "Requirement already satisfied: notebook>=4.4.1 in /opt/anaconda3/lib/python3.11/site-packages (from widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (7.3.2)\n",
46
+ "Requirement already satisfied: parso<0.9.0,>=0.8.0 in /opt/anaconda3/lib/python3.11/site-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets==7.7.2) (0.8.3)\n",
47
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets==7.7.2) (2.9.0.post0)\n",
48
+ "Requirement already satisfied: platformdirs>=2.5 in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-core!=5.0.*,>=4.12->ipykernel>=4.5.1->ipywidgets==7.7.2) (3.10.0)\n",
49
+ "Requirement already satisfied: jupyter-server<3,>=2.4.0 in /opt/anaconda3/lib/python3.11/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.10.0)\n",
50
+ "Requirement already satisfied: jupyterlab-server<3,>=2.27.1 in /opt/anaconda3/lib/python3.11/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.27.3)\n",
51
+ "Requirement already satisfied: jupyterlab<4.4,>=4.3.4 in /opt/anaconda3/lib/python3.11/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (4.3.4)\n",
52
+ "Requirement already satisfied: notebook-shim<0.3,>=0.2 in /opt/anaconda3/lib/python3.11/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.2.3)\n",
53
+ "Requirement already satisfied: ptyprocess>=0.5 in /opt/anaconda3/lib/python3.11/site-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets==7.7.2) (0.7.0)\n",
54
+ "Requirement already satisfied: wcwidth in /opt/anaconda3/lib/python3.11/site-packages (from prompt-toolkit<3.1.0,>=3.0.41->ipython>=4.0.0->ipywidgets==7.7.2) (0.2.5)\n",
55
+ "Requirement already satisfied: executing in /opt/anaconda3/lib/python3.11/site-packages (from stack-data->ipython>=4.0.0->ipywidgets==7.7.2) (0.8.3)\n",
56
+ "Requirement already satisfied: asttokens in /opt/anaconda3/lib/python3.11/site-packages (from stack-data->ipython>=4.0.0->ipywidgets==7.7.2) (2.0.5)\n",
57
+ "Requirement already satisfied: pure-eval in /opt/anaconda3/lib/python3.11/site-packages (from stack-data->ipython>=4.0.0->ipywidgets==7.7.2) (0.2.2)\n",
58
+ "Requirement already satisfied: anyio>=3.1.0 in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (4.2.0)\n",
59
+ "Requirement already satisfied: argon2-cffi in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (21.3.0)\n",
60
+ "Requirement already satisfied: jinja2 in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (3.1.3)\n",
61
+ "Requirement already satisfied: jupyter-events>=0.6.0 in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.8.0)\n",
62
+ "Requirement already satisfied: jupyter-server-terminals in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.4.4)\n",
63
+ "Requirement already satisfied: nbconvert>=6.4.4 in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (7.16.3)\n",
64
+ "Requirement already satisfied: nbformat>=5.3.0 in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (5.9.2)\n",
65
+ "Requirement already satisfied: overrides in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (7.4.0)\n",
66
+ "Requirement already satisfied: prometheus-client in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.14.1)\n",
67
+ "Requirement already satisfied: send2trash>=1.8.2 in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (1.8.2)\n",
68
+ "Requirement already satisfied: terminado>=0.8.3 in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.17.1)\n",
69
+ "Requirement already satisfied: websocket-client in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.58.0)\n",
70
+ "Requirement already satisfied: async-lru>=1.0.0 in /opt/anaconda3/lib/python3.11/site-packages (from jupyterlab<4.4,>=4.3.4->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.0.4)\n",
71
+ "Requirement already satisfied: httpx>=0.25.0 in /opt/anaconda3/lib/python3.11/site-packages (from jupyterlab<4.4,>=4.3.4->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.26.0)\n",
72
+ "Requirement already satisfied: jupyter-lsp>=2.0.0 in /opt/anaconda3/lib/python3.11/site-packages (from jupyterlab<4.4,>=4.3.4->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.2.0)\n",
73
+ "Requirement already satisfied: setuptools>=40.8.0 in /opt/anaconda3/lib/python3.11/site-packages (from jupyterlab<4.4,>=4.3.4->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (68.2.2)\n",
74
+ "Requirement already satisfied: babel>=2.10 in /opt/anaconda3/lib/python3.11/site-packages (from jupyterlab-server<3,>=2.27.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.11.0)\n",
75
+ "Requirement already satisfied: json5>=0.9.0 in /opt/anaconda3/lib/python3.11/site-packages (from jupyterlab-server<3,>=2.27.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.9.6)\n",
76
+ "Requirement already satisfied: jsonschema>=4.18.0 in /opt/anaconda3/lib/python3.11/site-packages (from jupyterlab-server<3,>=2.27.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (4.19.2)\n",
77
+ "Requirement already satisfied: requests>=2.31 in /opt/anaconda3/lib/python3.11/site-packages (from jupyterlab-server<3,>=2.27.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.32.3)\n",
78
+ "Requirement already satisfied: six>=1.5 in /opt/anaconda3/lib/python3.11/site-packages (from python-dateutil>=2.8.2->jupyter-client>=6.1.12->ipykernel>=4.5.1->ipywidgets==7.7.2) (1.17.0)\n",
79
+ "Requirement already satisfied: idna>=2.8 in /opt/anaconda3/lib/python3.11/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (3.4)\n",
80
+ "Requirement already satisfied: sniffio>=1.1 in /opt/anaconda3/lib/python3.11/site-packages (from anyio>=3.1.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (1.3.0)\n",
81
+ "Requirement already satisfied: pytz>=2015.7 in /opt/anaconda3/lib/python3.11/site-packages (from babel>=2.10->jupyterlab-server<3,>=2.27.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2025.2)\n",
82
+ "Requirement already satisfied: certifi in /opt/anaconda3/lib/python3.11/site-packages (from httpx>=0.25.0->jupyterlab<4.4,>=4.3.4->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2025.1.31)\n",
83
+ "Requirement already satisfied: httpcore==1.* in /opt/anaconda3/lib/python3.11/site-packages (from httpx>=0.25.0->jupyterlab<4.4,>=4.3.4->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (1.0.2)\n",
84
+ "Requirement already satisfied: h11<0.15,>=0.13 in /opt/anaconda3/lib/python3.11/site-packages (from httpcore==1.*->httpx>=0.25.0->jupyterlab<4.4,>=4.3.4->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.14.0)\n",
85
+ "Requirement already satisfied: MarkupSafe>=2.0 in /opt/anaconda3/lib/python3.11/site-packages (from jinja2->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.1.3)\n",
86
+ "Requirement already satisfied: attrs>=22.2.0 in /opt/anaconda3/lib/python3.11/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (23.1.0)\n",
87
+ "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /opt/anaconda3/lib/python3.11/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2023.7.1)\n",
88
+ "Requirement already satisfied: referencing>=0.28.4 in /opt/anaconda3/lib/python3.11/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.30.2)\n",
89
+ "Requirement already satisfied: rpds-py>=0.7.1 in /opt/anaconda3/lib/python3.11/site-packages (from jsonschema>=4.18.0->jupyterlab-server<3,>=2.27.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.10.6)\n",
90
+ "Requirement already satisfied: python-json-logger>=2.0.4 in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.0.7)\n",
91
+ "Requirement already satisfied: pyyaml>=5.3 in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (6.0.1)\n",
92
+ "Requirement already satisfied: rfc3339-validator in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.1.4)\n",
93
+ "Requirement already satisfied: rfc3986-validator>=0.1.1 in /opt/anaconda3/lib/python3.11/site-packages (from jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.1.1)\n",
94
+ "Requirement already satisfied: beautifulsoup4 in /opt/anaconda3/lib/python3.11/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (4.12.2)\n",
95
+ "Requirement already satisfied: bleach!=5.0.0 in /opt/anaconda3/lib/python3.11/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (4.1.0)\n",
96
+ "Requirement already satisfied: defusedxml in /opt/anaconda3/lib/python3.11/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.7.1)\n",
97
+ "Requirement already satisfied: jupyterlab-pygments in /opt/anaconda3/lib/python3.11/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.1.2)\n",
98
+ "Requirement already satisfied: mistune<4,>=2.0.3 in /opt/anaconda3/lib/python3.11/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.0.4)\n",
99
+ "Requirement already satisfied: nbclient>=0.5.0 in /opt/anaconda3/lib/python3.11/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.8.0)\n",
100
+ "Requirement already satisfied: pandocfilters>=1.4.1 in /opt/anaconda3/lib/python3.11/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (1.5.0)\n",
101
+ "Requirement already satisfied: tinycss2 in /opt/anaconda3/lib/python3.11/site-packages (from nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (1.2.1)\n",
102
+ "Requirement already satisfied: fastjsonschema in /opt/anaconda3/lib/python3.11/site-packages (from nbformat>=5.3.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.16.2)\n",
103
+ "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/anaconda3/lib/python3.11/site-packages (from requests>=2.31->jupyterlab-server<3,>=2.27.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.0.4)\n",
104
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/anaconda3/lib/python3.11/site-packages (from requests>=2.31->jupyterlab-server<3,>=2.27.1->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.0.7)\n",
105
+ "Requirement already satisfied: argon2-cffi-bindings in /opt/anaconda3/lib/python3.11/site-packages (from argon2-cffi->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (21.2.0)\n",
106
+ "Requirement already satisfied: webencodings in /opt/anaconda3/lib/python3.11/site-packages (from bleach!=5.0.0->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (0.5.1)\n",
107
+ "Requirement already satisfied: fqdn in /opt/anaconda3/lib/python3.11/site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (1.5.1)\n",
108
+ "Requirement already satisfied: isoduration in /opt/anaconda3/lib/python3.11/site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (20.11.0)\n",
109
+ "Requirement already satisfied: jsonpointer>1.13 in /opt/anaconda3/lib/python3.11/site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.1)\n",
110
+ "Requirement already satisfied: uri-template in /opt/anaconda3/lib/python3.11/site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (1.3.0)\n",
111
+ "Requirement already satisfied: webcolors>=1.11 in /opt/anaconda3/lib/python3.11/site-packages (from jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (24.11.1)\n",
112
+ "Requirement already satisfied: cffi>=1.0.1 in /opt/anaconda3/lib/python3.11/site-packages (from argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (1.16.0)\n",
113
+ "Requirement already satisfied: soupsieve>1.2 in /opt/anaconda3/lib/python3.11/site-packages (from beautifulsoup4->nbconvert>=6.4.4->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.5)\n",
114
+ "Requirement already satisfied: pycparser in /opt/anaconda3/lib/python3.11/site-packages (from cffi>=1.0.1->argon2-cffi-bindings->argon2-cffi->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (2.21)\n",
115
+ "Requirement already satisfied: arrow>=0.15.0 in /opt/anaconda3/lib/python3.11/site-packages (from isoduration->jsonschema[format-nongpl]>=4.18.0->jupyter-events>=0.6.0->jupyter-server<3,>=2.4.0->notebook>=4.4.1->widgetsnbextension~=3.6.0->ipywidgets==7.7.2) (1.2.3)\n",
116
+ "usage: jupyter [-h] [--version] [--config-dir] [--data-dir] [--runtime-dir]\n",
117
+ " [--paths] [--json] [--debug]\n",
118
+ " [subcommand]\n",
119
+ "\n",
120
+ "Jupyter: Interactive Computing\n",
121
+ "\n",
122
+ "positional arguments:\n",
123
+ " subcommand the subcommand to launch\n",
124
+ "\n",
125
+ "options:\n",
126
+ " -h, --help show this help message and exit\n",
127
+ " --version show the versions of core jupyter packages and exit\n",
128
+ " --config-dir show Jupyter config dir\n",
129
+ " --data-dir show Jupyter data dir\n",
130
+ " --runtime-dir show Jupyter runtime dir\n",
131
+ " --paths show all Jupyter paths. Add --json for machine-readable\n",
132
+ " format.\n",
133
+ " --json output paths as machine-readable json\n",
134
+ " --debug output debug information about paths\n",
135
+ "\n",
136
+ "Available subcommands: console dejavu events execute kernel kernelspec lab\n",
137
+ "labextension labhub migrate nbconvert notebook qtconsole run server\n",
138
+ "troubleshoot trust\n",
139
+ "\n",
140
+ "Jupyter command `jupyter-nbextension` not found.\n",
141
+ "\u001b[32m[I 2025-06-14 22:49:12.578 ServerApp]\u001b[m Package notebook took 0.0000s to import\n",
142
+ "\u001b[32m[I 2025-06-14 22:49:12.740 ServerApp]\u001b[m Package aext_assistant took 0.1621s to import\n",
143
+ "\u001b[32m[I 2025-06-14 22:49:12.741 ServerApp]\u001b[m Package aext_core took 0.0011s to import\n",
144
+ "\u001b[33m[W 2025-06-14 22:49:12.748 ServerApp]\u001b[m aext_panels | error adding extension (enabled: True): The module 'aext_panels' could not be found (cannot import name 'AuthConfig' from 'anaconda_cloud_auth.client' (/opt/anaconda3/lib/python3.11/site-packages/anaconda_cloud_auth/client.py)). Are you sure the extension is installed?\n",
145
+ " Traceback (most recent call last):\n",
146
+ " File \"/opt/anaconda3/lib/python3.11/site-packages/jupyter_server/extension/manager.py\", line 321, in add_extension\n",
147
+ " extpkg = ExtensionPackage(name=extension_name, enabled=enabled)\n",
148
+ " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
149
+ " File \"/opt/anaconda3/lib/python3.11/site-packages/jupyter_server/extension/manager.py\", line 185, in __init__\n",
150
+ " self._load_metadata()\n",
151
+ " File \"/opt/anaconda3/lib/python3.11/site-packages/jupyter_server/extension/manager.py\", line 200, in _load_metadata\n",
152
+ " raise ExtensionModuleNotFound(msg) from None\n",
153
+ " jupyter_server.extension.utils.ExtensionModuleNotFound: The module 'aext_panels' could not be found (cannot import name 'AuthConfig' from 'anaconda_cloud_auth.client' (/opt/anaconda3/lib/python3.11/site-packages/anaconda_cloud_auth/client.py)). Are you sure the extension is installed?\n",
154
+ "\u001b[32m[I 2025-06-14 22:49:12.750 ServerApp]\u001b[m Package aext_share_notebook took 0.0009s to import\n",
155
+ "\u001b[32m[I 2025-06-14 22:49:12.757 ServerApp]\u001b[m Package jupyter_lsp took 0.0074s to import\n",
156
+ "\u001b[33m[W 2025-06-14 22:49:12.757 ServerApp]\u001b[m A `_jupyter_server_extension_points` function was not found in jupyter_lsp. Instead, a `_jupyter_server_extension_paths` function was found and will be used for now. This function name will be deprecated in future releases of Jupyter Server.\n",
157
+ "\u001b[32m[I 2025-06-14 22:49:12.761 ServerApp]\u001b[m Package jupyter_server_terminals took 0.0035s to import\n",
158
+ "\u001b[32m[I 2025-06-14 22:49:12.761 ServerApp]\u001b[m Package jupyterlab took 0.0000s to import\n",
159
+ "\u001b[32m[I 2025-06-14 22:49:13.042 ServerApp]\u001b[m Package notebook_shim took 0.0000s to import\n",
160
+ "\u001b[33m[W 2025-06-14 22:49:13.042 ServerApp]\u001b[m A `_jupyter_server_extension_points` function was not found in notebook_shim. Instead, a `_jupyter_server_extension_paths` function was found and will be used for now. This function name will be deprecated in future releases of Jupyter Server.\n",
161
+ "\u001b[32m[I 2025-06-14 22:49:13.470 ServerApp]\u001b[m Package panel.io.jupyter_server_extension took 0.4273s to import\n",
162
+ "\u001b[32m[I 2025-06-14 22:49:13.471 ServerApp]\u001b[m aext_assistant | extension was successfully linked.\n",
163
+ "\u001b[32m[I 2025-06-14 22:49:13.471 ServerApp]\u001b[m aext_core | extension was successfully linked.\n",
164
+ "\u001b[32m[I 2025-06-14 22:49:13.471 ServerApp]\u001b[m aext_share_notebook | extension was successfully linked.\n",
165
+ "\u001b[32m[I 2025-06-14 22:49:13.471 ServerApp]\u001b[m jupyter_lsp | extension was successfully linked.\n",
166
+ "\u001b[32m[I 2025-06-14 22:49:13.473 ServerApp]\u001b[m jupyter_server_terminals | extension was successfully linked.\n",
167
+ "\u001b[32m[I 2025-06-14 22:49:13.474 ServerApp]\u001b[m jupyterlab | extension was successfully linked.\n",
168
+ "\u001b[32m[I 2025-06-14 22:49:13.475 ServerApp]\u001b[m notebook | extension was successfully linked.\n",
169
+ "\u001b[32m[I 2025-06-14 22:49:13.564 ServerApp]\u001b[m notebook_shim | extension was successfully linked.\n",
170
+ "\u001b[32m[I 2025-06-14 22:49:13.564 ServerApp]\u001b[m panel.io.jupyter_server_extension | extension was successfully linked.\n",
171
+ "\u001b[32m[I 2025-06-14 22:49:13.584 ServerApp]\u001b[m notebook_shim | extension was successfully loaded.\n",
172
+ "\u001b[32m[I 2025-06-14 22:49:13.585 ServerApp]\u001b[m Registered aext_assistant server extension\n",
173
+ "\u001b[32m[I 2025-06-14 22:49:13.585 ServerApp]\u001b[m aext_assistant | extension was successfully loaded.\n",
174
+ "\u001b[32m[I 2025-06-14 22:49:13.585 ServerApp]\u001b[m Registered aext_core server extension\n",
175
+ "\u001b[32m[I 2025-06-14 22:49:13.585 ServerApp]\u001b[m aext_core | extension was successfully loaded.\n",
176
+ "\u001b[32m[I 2025-06-14 22:49:13.585 ServerApp]\u001b[m Registered aext_share_notebook_server server extension\n",
177
+ "\u001b[32m[I 2025-06-14 22:49:13.585 ServerApp]\u001b[m aext_share_notebook | extension was successfully loaded.\n",
178
+ "\u001b[32m[I 2025-06-14 22:49:13.586 ServerApp]\u001b[m jupyter_lsp | extension was successfully loaded.\n",
179
+ "\u001b[32m[I 2025-06-14 22:49:13.586 ServerApp]\u001b[m jupyter_server_terminals | extension was successfully loaded.\n",
180
+ "\u001b[32m[I 2025-06-14 22:49:13.588 LabApp]\u001b[m JupyterLab extension loaded from /opt/anaconda3/lib/python3.11/site-packages/jupyterlab\n",
181
+ "\u001b[32m[I 2025-06-14 22:49:13.588 LabApp]\u001b[m JupyterLab application directory is /opt/anaconda3/share/jupyter/lab\n",
182
+ "\u001b[32m[I 2025-06-14 22:49:13.589 LabApp]\u001b[m Extension Manager is 'pypi'.\n",
183
+ "\u001b[32m[I 2025-06-14 22:49:13.606 ServerApp]\u001b[m jupyterlab | extension was successfully loaded.\n",
184
+ "\u001b[32m[I 2025-06-14 22:49:13.608 ServerApp]\u001b[m notebook | extension was successfully loaded.\n",
185
+ "\u001b[32m[I 2025-06-14 22:49:13.608 ServerApp]\u001b[m panel.io.jupyter_server_extension | extension was successfully loaded.\n",
186
+ "\u001b[32m[I 2025-06-14 22:49:13.609 ServerApp]\u001b[m The port 8888 is already in use, trying another port.\n",
187
+ "\u001b[32m[I 2025-06-14 22:49:13.609 ServerApp]\u001b[m The port 8889 is already in use, trying another port.\n",
188
+ "\u001b[32m[I 2025-06-14 22:49:13.609 ServerApp]\u001b[m The port 8890 is already in use, trying another port.\n",
189
+ "\u001b[32m[I 2025-06-14 22:49:13.610 ServerApp]\u001b[m Serving notebooks from local directory: /Users/ddiaz/Desktop/Proyectos_ImageMarker/POC-JuJitsu\n",
190
+ "\u001b[32m[I 2025-06-14 22:49:13.610 ServerApp]\u001b[m Jupyter Server 2.10.0 is running at:\n",
191
+ "\u001b[32m[I 2025-06-14 22:49:13.610 ServerApp]\u001b[m http://localhost:8891/tree?token=da5dcdce5100318c25020e1cce57e2b157f8243ee0051999\n",
192
+ "\u001b[32m[I 2025-06-14 22:49:13.610 ServerApp]\u001b[m http://127.0.0.1:8891/tree?token=da5dcdce5100318c25020e1cce57e2b157f8243ee0051999\n",
193
+ "\u001b[32m[I 2025-06-14 22:49:13.610 ServerApp]\u001b[m Use Control-C to stop this server and shut down all kernels (twice to skip confirmation).\n",
194
+ "\u001b[35m[C 2025-06-14 22:49:13.611 ServerApp]\u001b[m \n",
195
+ " \n",
196
+ " To access the server, open this file in a browser:\n",
197
+ " file:///Users/ddiaz/Library/Jupyter/runtime/jpserver-72351-open.html\n",
198
+ " Or copy and paste one of these URLs:\n",
199
+ " http://localhost:8891/tree?token=da5dcdce5100318c25020e1cce57e2b157f8243ee0051999\n",
200
+ " http://127.0.0.1:8891/tree?token=da5dcdce5100318c25020e1cce57e2b157f8243ee0051999\n",
201
+ "\u001b[32m[I 2025-06-14 22:49:13.912 ServerApp]\u001b[m Skipped non-installed server(s): bash-language-server, dockerfile-language-server-nodejs, javascript-typescript-langserver, jedi-language-server, julia-language-server, pyright, python-language-server, r-languageserver, sql-language-server, texlab, typescript-language-server, unified-language-server, vscode-css-languageserver-bin, vscode-html-languageserver-bin, vscode-json-languageserver-bin, yaml-language-server\n",
202
+ "0.00s - Debugger warning: It seems that frozen modules are being used, which may\n",
203
+ "0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off\n",
204
+ "0.00s - to python to disable frozen modules.\n",
205
+ "0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.\n",
206
+ "\u001b[33m[W 2025-06-14 22:49:15.430 ServerApp]\u001b[m wrote error: 'Forbidden'\n",
207
+ " Traceback (most recent call last):\n",
208
+ " File \"/opt/anaconda3/lib/python3.11/site-packages/tornado/web.py\", line 1786, in _execute\n",
209
+ " result = await result\n",
210
+ " ^^^^^^^^^^^^\n",
211
+ " File \"/opt/anaconda3/lib/python3.11/site-packages/aext_assistant_server/handlers.py\", line 117, in get\n",
212
+ " raise HTTPError(403, reason=\"missing nucleus_token\")\n",
213
+ " tornado.web.HTTPError: HTTP 403: missing nucleus_token\n",
214
+ "\u001b[33m[W 2025-06-14 22:49:15.432 ServerApp]\u001b[m 403 GET /aext_assistant_server/nucleus_token?1749962955366 (e1bb6698e462478ab1b1bdda87374748@::1) 2.68ms referer=http://localhost:8891/tree\n",
215
+ "\u001b[33m[W 2025-06-14 22:49:31.915 ServerApp]\u001b[m wrote error: 'Forbidden'\n",
216
+ " Traceback (most recent call last):\n",
217
+ " File \"/opt/anaconda3/lib/python3.11/site-packages/tornado/web.py\", line 1786, in _execute\n",
218
+ " result = await result\n",
219
+ " ^^^^^^^^^^^^\n",
220
+ " File \"/opt/anaconda3/lib/python3.11/site-packages/aext_assistant_server/handlers.py\", line 117, in get\n",
221
+ " raise HTTPError(403, reason=\"missing nucleus_token\")\n",
222
+ " tornado.web.HTTPError: HTTP 403: missing nucleus_token\n",
223
+ "\u001b[33m[W 2025-06-14 22:49:31.915 ServerApp]\u001b[m 403 GET /aext_assistant_server/nucleus_token?1749962971861 (e1bb6698e462478ab1b1bdda87374748@::1) 1.29ms referer=http://localhost:8891/tree\n"
224
+ ]
225
+ }
226
+ ],
227
+ "source": [
228
+ "\n",
229
+ "!pip install --quiet openai langchain faiss-cpu PyPDF2 sentence-transformers joblib\n",
230
+ "!pip install ipywidgets==7.7.2\n",
231
+ "!jupyter nbextension enable --py widgetsnbextension\n",
232
+ "!jupyter notebook\n"
233
+ ]
234
+ },
235
+ {
236
+ "cell_type": "code",
237
+ "execution_count": 1,
238
+ "id": "49ee7721",
239
+ "metadata": {},
240
+ "outputs": [
241
+ {
242
+ "name": "stdout",
243
+ "output_type": "stream",
244
+ "text": [
245
+ "Loaded 329 chunks\n"
246
+ ]
247
+ }
248
+ ],
249
+ "source": [
250
+ "\n",
251
+ "from PyPDF2 import PdfReader\n",
252
+ "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
253
+ "\n",
254
+ "def load_pdf_chunks(pdf_path):\n",
255
+ " reader = PdfReader(pdf_path)\n",
256
+ " raw_text = \"\"\n",
257
+ " for page in reader.pages:\n",
258
+ " raw_text += page.extract_text() + \"\\n\"\n",
259
+ "\n",
260
+ " splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)\n",
261
+ " return splitter.split_text(raw_text)\n",
262
+ "\n",
263
+ "chunks = load_pdf_chunks(\"JuJutsu-Contexto-Significado-Conexiones-Historia.pdf\")\n",
264
+ "print(f\"Loaded {len(chunks)} chunks\")\n"
265
+ ]
266
+ },
267
+ {
268
+ "cell_type": "code",
269
+ "execution_count": 3,
270
+ "id": "371c637e",
271
+ "metadata": {},
272
+ "outputs": [
273
+ {
274
+ "name": "stdout",
275
+ "output_type": "stream",
276
+ "text": [
277
+ "Model, chunks, and index serialized to rag_model.joblib\n"
278
+ ]
279
+ }
280
+ ],
281
+ "source": [
282
+ "\n",
283
+ "from sentence_transformers import SentenceTransformer\n",
284
+ "import faiss\n",
285
+ "import numpy as np\n",
286
+ "import joblib\n",
287
+ "\n",
288
+ "model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')\n",
289
+ "embeddings = model.encode(chunks)\n",
290
+ "\n",
291
+ "index = faiss.IndexFlatL2(embeddings.shape[1])\n",
292
+ "index.add(np.array(embeddings))\n",
293
+ "\n",
294
+ "joblib.dump((model, chunks, index), \"rag_model.joblib\")\n",
295
+ "print(\"Model, chunks, and index serialized to rag_model.joblib\")\n"
296
+ ]
297
+ },
298
+ {
299
+ "cell_type": "code",
300
+ "execution_count": 5,
301
+ "id": "28ce4963",
302
+ "metadata": {},
303
+ "outputs": [
304
+ {
305
+ "name": "stdout",
306
+ "output_type": "stream",
307
+ "text": [
308
+ "Model, chunks, and index loaded from rag_model.joblib\n"
309
+ ]
310
+ }
311
+ ],
312
+ "source": [
313
+ "\n",
314
+ "import joblib\n",
315
+ "\n",
316
+ "model, chunks, index = joblib.load(\"rag_model.joblib\")\n",
317
+ "print(\"Model, chunks, and index loaded from rag_model.joblib\")\n"
318
+ ]
319
+ },
320
+ {
321
+ "cell_type": "code",
322
+ "execution_count": 7,
323
+ "id": "51a89e77",
324
+ "metadata": {},
325
+ "outputs": [],
326
+ "source": [
327
+ "\n",
328
+ "def search(query, k=3):\n",
329
+ " query_vec = model.encode([query])\n",
330
+ " scores, indices = index.search(np.array(query_vec), k)\n",
331
+ " return [chunks[i] for i in indices[0]]\n"
332
+ ]
333
+ },
334
+ {
335
+ "cell_type": "code",
336
+ "execution_count": 23,
337
+ "id": "34315775",
338
+ "metadata": {},
339
+ "outputs": [],
340
+ "source": [
341
+ "\n",
342
+ "import os\n",
343
+ "\n",
344
+ "import openai\n",
345
+ "from openai import OpenAI\n",
346
+ "\n",
347
+ "os.environ[\"OPENAI_API_KEY\"] = \"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\"\n",
348
+ "\n",
349
+ "\n",
350
+ "client = OpenAI() # Uses env variable OPENAI_API_KEY\n",
351
+ "\n",
352
+ "def chat_no_rag(question):\n",
353
+ " response = client.chat.completions.create(\n",
354
+ " model=\"gpt-4o\",\n",
355
+ " messages=[\n",
356
+ " {\"role\": \"user\", \"content\": question}\n",
357
+ " ],\n",
358
+ " temperature=0.5,\n",
359
+ " max_tokens=200, # 👈 Limit output to ~150-200 words\n",
360
+ "\n",
361
+ " )\n",
362
+ " return response.choices[0].message.content\n",
363
+ "\n",
364
+ "def chat_with_rag(question, retrieved_chunks):\n",
365
+ " context = \"\\n\".join(retrieved_chunks)\n",
366
+ " prompt = f\"Usa el siguiente contexto para responder la pregunta:\\n\\n{context}\\n\\nPregunta: {question}\"\n",
367
+ "\n",
368
+ " response = client.chat.completions.create(\n",
369
+ " model=\"gpt-4o\",\n",
370
+ " messages=[\n",
371
+ " {\"role\": \"user\", \"content\": prompt}\n",
372
+ " ],\n",
373
+ " temperature=0.3,\n",
374
+ " max_tokens=200, # 👈 Limit output to ~150-200 words\n",
375
+ " )\n",
376
+ " return response.choices[0].message.content\n",
377
+ "\n",
378
+ "\n",
379
+ "def chat_with_rag_enhanced(question, retrieved_chunks):\n",
380
+ " context = \"\\n\".join(retrieved_chunks)\n",
381
+ " prompt = (\n",
382
+ " \"Eres un experto en historia marcial. \"\n",
383
+ " \"Usa el siguiente contexto histórico para responder con precisión y detalle.\\n\\n\"\n",
384
+ " f\"Contexto:\\n{context}\\n\\n\"\n",
385
+ " f\"Pregunta: {question}\\nRespuesta:\"\n",
386
+ " )\n",
387
+ "\n",
388
+ " response = client.chat.completions.create(\n",
389
+ " model=\"gpt-4o\",\n",
390
+ " messages=[\n",
391
+ " {\"role\": \"user\", \"content\": prompt}\n",
392
+ " ],\n",
393
+ " temperature=0.2,\n",
394
+ " max_tokens=200, # 👈 Limit output to ~150-200 words\n",
395
+ " )\n",
396
+ " return response.choices[0].message.content\n",
397
+ "\n",
398
+ "\n",
399
+ "\n",
400
+ "\n"
401
+ ]
402
+ },
403
+ {
404
+ "cell_type": "code",
405
+ "execution_count": 25,
406
+ "id": "900dfdfa",
407
+ "metadata": {},
408
+ "outputs": [
409
+ {
410
+ "name": "stdout",
411
+ "output_type": "stream",
412
+ "text": [
413
+ "🔹 Sin RAG:\n"
414
+ ]
415
+ },
416
+ {
417
+ "ename": "RateLimitError",
418
+ "evalue": "Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}",
419
+ "output_type": "error",
420
+ "traceback": [
421
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
422
+ "\u001b[0;31mRateLimitError\u001b[0m Traceback (most recent call last)",
423
+ "Cell \u001b[0;32mIn[25], line 6\u001b[0m\n\u001b[1;32m 3\u001b[0m retrieved \u001b[38;5;241m=\u001b[39m search(query)\n\u001b[1;32m 5\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m🔹 Sin RAG:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m----> 6\u001b[0m \u001b[38;5;28mprint\u001b[39m(chat_no_rag(query))\n\u001b[1;32m 8\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m🔹 Con RAG:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 9\u001b[0m \u001b[38;5;28mprint\u001b[39m(chat_with_rag(query, retrieved))\n",
424
+ "Cell \u001b[0;32mIn[23], line 12\u001b[0m, in \u001b[0;36mchat_no_rag\u001b[0;34m(question)\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mchat_no_rag\u001b[39m(question):\n\u001b[0;32m---> 12\u001b[0m response \u001b[38;5;241m=\u001b[39m client\u001b[38;5;241m.\u001b[39mchat\u001b[38;5;241m.\u001b[39mcompletions\u001b[38;5;241m.\u001b[39mcreate(\n\u001b[1;32m 13\u001b[0m model\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgpt-4o\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 14\u001b[0m messages\u001b[38;5;241m=\u001b[39m[\n\u001b[1;32m 15\u001b[0m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrole\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muser\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m\"\u001b[39m: question}\n\u001b[1;32m 16\u001b[0m ],\n\u001b[1;32m 17\u001b[0m temperature\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.5\u001b[39m,\n\u001b[1;32m 18\u001b[0m )\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m response\u001b[38;5;241m.\u001b[39mchoices[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mmessage\u001b[38;5;241m.\u001b[39mcontent\n",
425
+ "File \u001b[0;32m/opt/anaconda3/lib/python3.11/site-packages/openai/_utils/_utils.py:287\u001b[0m, in \u001b[0;36mrequired_args.<locals>.inner.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 285\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 286\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 287\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m func(\u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n",
426
+ "File \u001b[0;32m/opt/anaconda3/lib/python3.11/site-packages/openai/resources/chat/completions/completions.py:925\u001b[0m, in \u001b[0;36mCompletions.create\u001b[0;34m(self, messages, model, audio, frequency_penalty, function_call, functions, logit_bias, logprobs, max_completion_tokens, max_tokens, metadata, modalities, n, parallel_tool_calls, prediction, presence_penalty, reasoning_effort, response_format, seed, service_tier, stop, store, stream, stream_options, temperature, tool_choice, tools, top_logprobs, top_p, user, web_search_options, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 882\u001b[0m \u001b[38;5;129m@required_args\u001b[39m([\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m], [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[1;32m 883\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mcreate\u001b[39m(\n\u001b[1;32m 884\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 922\u001b[0m timeout: \u001b[38;5;28mfloat\u001b[39m \u001b[38;5;241m|\u001b[39m httpx\u001b[38;5;241m.\u001b[39mTimeout \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m|\u001b[39m NotGiven \u001b[38;5;241m=\u001b[39m NOT_GIVEN,\n\u001b[1;32m 923\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ChatCompletion \u001b[38;5;241m|\u001b[39m Stream[ChatCompletionChunk]:\n\u001b[1;32m 924\u001b[0m validate_response_format(response_format)\n\u001b[0;32m--> 925\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_post(\n\u001b[1;32m 926\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m/chat/completions\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 927\u001b[0m body\u001b[38;5;241m=\u001b[39mmaybe_transform(\n\u001b[1;32m 928\u001b[0m {\n\u001b[1;32m 929\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmessages\u001b[39m\u001b[38;5;124m\"\u001b[39m: messages,\n\u001b[1;32m 930\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m: model,\n\u001b[1;32m 931\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maudio\u001b[39m\u001b[38;5;124m\"\u001b[39m: audio,\n\u001b[1;32m 932\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfrequency_penalty\u001b[39m\u001b[38;5;124m\"\u001b[39m: frequency_penalty,\n\u001b[1;32m 933\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfunction_call\u001b[39m\u001b[38;5;124m\"\u001b[39m: function_call,\n\u001b[1;32m 934\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfunctions\u001b[39m\u001b[38;5;124m\"\u001b[39m: functions,\n\u001b[1;32m 935\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlogit_bias\u001b[39m\u001b[38;5;124m\"\u001b[39m: logit_bias,\n\u001b[1;32m 936\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlogprobs\u001b[39m\u001b[38;5;124m\"\u001b[39m: logprobs,\n\u001b[1;32m 937\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_completion_tokens\u001b[39m\u001b[38;5;124m\"\u001b[39m: max_completion_tokens,\n\u001b[1;32m 938\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmax_tokens\u001b[39m\u001b[38;5;124m\"\u001b[39m: max_tokens,\n\u001b[1;32m 939\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmetadata\u001b[39m\u001b[38;5;124m\"\u001b[39m: metadata,\n\u001b[1;32m 940\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodalities\u001b[39m\u001b[38;5;124m\"\u001b[39m: modalities,\n\u001b[1;32m 941\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn\u001b[39m\u001b[38;5;124m\"\u001b[39m: n,\n\u001b[1;32m 942\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparallel_tool_calls\u001b[39m\u001b[38;5;124m\"\u001b[39m: parallel_tool_calls,\n\u001b[1;32m 943\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprediction\u001b[39m\u001b[38;5;124m\"\u001b[39m: prediction,\n\u001b[1;32m 944\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpresence_penalty\u001b[39m\u001b[38;5;124m\"\u001b[39m: presence_penalty,\n\u001b[1;32m 945\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mreasoning_effort\u001b[39m\u001b[38;5;124m\"\u001b[39m: reasoning_effort,\n\u001b[1;32m 946\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mresponse_format\u001b[39m\u001b[38;5;124m\"\u001b[39m: response_format,\n\u001b[1;32m 947\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mseed\u001b[39m\u001b[38;5;124m\"\u001b[39m: seed,\n\u001b[1;32m 948\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mservice_tier\u001b[39m\u001b[38;5;124m\"\u001b[39m: service_tier,\n\u001b[1;32m 949\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstop\u001b[39m\u001b[38;5;124m\"\u001b[39m: stop,\n\u001b[1;32m 950\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstore\u001b[39m\u001b[38;5;124m\"\u001b[39m: store,\n\u001b[1;32m 951\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream\u001b[39m\u001b[38;5;124m\"\u001b[39m: stream,\n\u001b[1;32m 952\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mstream_options\u001b[39m\u001b[38;5;124m\"\u001b[39m: stream_options,\n\u001b[1;32m 953\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtemperature\u001b[39m\u001b[38;5;124m\"\u001b[39m: temperature,\n\u001b[1;32m 954\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtool_choice\u001b[39m\u001b[38;5;124m\"\u001b[39m: tool_choice,\n\u001b[1;32m 955\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtools\u001b[39m\u001b[38;5;124m\"\u001b[39m: tools,\n\u001b[1;32m 956\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtop_logprobs\u001b[39m\u001b[38;5;124m\"\u001b[39m: top_logprobs,\n\u001b[1;32m 957\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtop_p\u001b[39m\u001b[38;5;124m\"\u001b[39m: top_p,\n\u001b[1;32m 958\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muser\u001b[39m\u001b[38;5;124m\"\u001b[39m: user,\n\u001b[1;32m 959\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mweb_search_options\u001b[39m\u001b[38;5;124m\"\u001b[39m: web_search_options,\n\u001b[1;32m 960\u001b[0m },\n\u001b[1;32m 961\u001b[0m completion_create_params\u001b[38;5;241m.\u001b[39mCompletionCreateParamsStreaming\n\u001b[1;32m 962\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m stream\n\u001b[1;32m 963\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m completion_create_params\u001b[38;5;241m.\u001b[39mCompletionCreateParamsNonStreaming,\n\u001b[1;32m 964\u001b[0m ),\n\u001b[1;32m 965\u001b[0m options\u001b[38;5;241m=\u001b[39mmake_request_options(\n\u001b[1;32m 966\u001b[0m extra_headers\u001b[38;5;241m=\u001b[39mextra_headers, extra_query\u001b[38;5;241m=\u001b[39mextra_query, extra_body\u001b[38;5;241m=\u001b[39mextra_body, timeout\u001b[38;5;241m=\u001b[39mtimeout\n\u001b[1;32m 967\u001b[0m ),\n\u001b[1;32m 968\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mChatCompletion,\n\u001b[1;32m 969\u001b[0m stream\u001b[38;5;241m=\u001b[39mstream \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28;01mFalse\u001b[39;00m,\n\u001b[1;32m 970\u001b[0m stream_cls\u001b[38;5;241m=\u001b[39mStream[ChatCompletionChunk],\n\u001b[1;32m 971\u001b[0m )\n",
427
+ "File \u001b[0;32m/opt/anaconda3/lib/python3.11/site-packages/openai/_base_client.py:1242\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1228\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1229\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1230\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1237\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1238\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1239\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1240\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1241\u001b[0m )\n\u001b[0;32m-> 1242\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mrequest(cast_to, opts, stream\u001b[38;5;241m=\u001b[39mstream, stream_cls\u001b[38;5;241m=\u001b[39mstream_cls))\n",
428
+ "File \u001b[0;32m/opt/anaconda3/lib/python3.11/site-packages/openai/_base_client.py:1037\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1034\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 1036\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1037\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1039\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 1041\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m response \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcould not resolve response (should never happen)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
429
+ "\u001b[0;31mRateLimitError\u001b[0m: Error code: 429 - {'error': {'message': 'You exceeded your current quota, please check your plan and billing details. For more information on this error, read the docs: https://platform.openai.com/docs/guides/error-codes/api-errors.', 'type': 'insufficient_quota', 'param': None, 'code': 'insufficient_quota'}}"
430
+ ]
431
+ }
432
+ ],
433
+ "source": [
434
+ "\n",
435
+ "# Example query\n",
436
+ "query = \"¿Cuál es el origen del JuJutsu en Japón?\"\n",
437
+ "retrieved = search(query)\n",
438
+ "\n",
439
+ "print(\"🔹 Sin RAG:\")\n",
440
+ "print(chat_no_rag(query))\n",
441
+ "\n",
442
+ "print(\"\\n🔹 Con RAG:\")\n",
443
+ "print(chat_with_rag(query, retrieved))\n",
444
+ "\n",
445
+ "print(\"\\n🔹 Con RAG + Prompt mejorado:\")\n",
446
+ "print(chat_with_rag_enhanced(query, retrieved))\n"
447
+ ]
448
+ },
449
+ {
450
+ "cell_type": "code",
451
+ "execution_count": null,
452
+ "id": "0b852f41-946b-4684-b3c8-5ee2b416710b",
453
+ "metadata": {},
454
+ "outputs": [],
455
+ "source": []
456
+ }
457
+ ],
458
+ "metadata": {
459
+ "kernelspec": {
460
+ "display_name": "Python 3 (ipykernel)",
461
+ "language": "python",
462
+ "name": "python3"
463
+ },
464
+ "language_info": {
465
+ "codemirror_mode": {
466
+ "name": "ipython",
467
+ "version": 3
468
+ },
469
+ "file_extension": ".py",
470
+ "mimetype": "text/x-python",
471
+ "name": "python",
472
+ "nbconvert_exporter": "python",
473
+ "pygments_lexer": "ipython3",
474
+ "version": "3.11.7"
475
+ }
476
+ },
477
+ "nbformat": 4,
478
+ "nbformat_minor": 5
479
+ }
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ openai
2
+ streamlit
3
+ langchain
4
+ PyPDF2
5
+ faiss-cpu
6
+ sentence-transformers
7
+ joblib