Spaces:
Running
Running
Upload 2 files
Browse files- app.py +59 -16
- requirements.txt +1 -1
app.py
CHANGED
@@ -11,9 +11,23 @@ from langchain.prompts import PromptTemplate
|
|
11 |
st.set_page_config(page_title='preguntaDOC')
|
12 |
st.header("Pregunta a tu PDF")
|
13 |
|
14 |
-
# Campo para el token de Hugging Face
|
15 |
huggingface_api_token = st.text_input('Hugging Face API Token (requerido)', type='password')
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
pdf_obj = st.file_uploader("Carga tu documento", type="pdf", on_change=st.cache_resource.clear)
|
18 |
|
19 |
@st.cache_resource
|
@@ -36,8 +50,7 @@ def create_embeddings(pdf, api_token):
|
|
36 |
)
|
37 |
chunks = text_splitter.split_text(text)
|
38 |
|
39 |
-
# Usar HuggingFaceHubEmbeddings
|
40 |
-
# Este enfoque no requiere sentence-transformers instalado localmente
|
41 |
embeddings = HuggingFaceHubEmbeddings(
|
42 |
repo_id="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
43 |
huggingfacehub_api_token=api_token
|
@@ -55,22 +68,51 @@ if pdf_obj and huggingface_api_token:
|
|
55 |
if user_question:
|
56 |
docs = knowledge_base.similarity_search(user_question, 3)
|
57 |
|
58 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
59 |
llm = HuggingFaceHub(
|
60 |
-
repo_id=
|
61 |
huggingfacehub_api_token=huggingface_api_token,
|
62 |
-
model_kwargs=
|
63 |
)
|
64 |
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
74 |
|
75 |
PROMPT = PromptTemplate(
|
76 |
template=prompt_template,
|
@@ -79,11 +121,12 @@ if pdf_obj and huggingface_api_token:
|
|
79 |
|
80 |
chain = load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)
|
81 |
|
82 |
-
with st.spinner("Procesando tu pregunta..."):
|
83 |
try:
|
84 |
respuesta = chain.run(input_documents=docs, question=user_question)
|
85 |
st.write(respuesta)
|
86 |
except Exception as e:
|
87 |
st.error(f"Error al procesar tu pregunta: {str(e)}")
|
|
|
88 |
elif not huggingface_api_token and pdf_obj:
|
89 |
st.warning("Por favor, ingresa tu token de API de Hugging Face para continuar.")
|
|
|
11 |
st.set_page_config(page_title='preguntaDOC')
|
12 |
st.header("Pregunta a tu PDF")
|
13 |
|
14 |
+
# Campo para el token de Hugging Face
|
15 |
huggingface_api_token = st.text_input('Hugging Face API Token (requerido)', type='password')
|
16 |
|
17 |
+
# Selección de modelo
|
18 |
+
modelo_options = {
|
19 |
+
"Google FLAN-T5 Small": "google/flan-t5-small",
|
20 |
+
"Google FLAN-T5 Base": "google/flan-t5-base",
|
21 |
+
"BLOOM 560M": "bigscience/bloom-560m",
|
22 |
+
"BLOOM 1.1B": "bigscience/bloom-1b1",
|
23 |
+
"Falcon 7B Instruct": "tiiuae/falcon-7b-instruct",
|
24 |
+
"Gemma 2B": "google/gemma-2b",
|
25 |
+
"Gemma 2B Instruct": "google/gemma-2b-it"
|
26 |
+
}
|
27 |
+
|
28 |
+
selected_model = st.selectbox("Selecciona un modelo:", list(modelo_options.keys()))
|
29 |
+
modelo_id = modelo_options[selected_model]
|
30 |
+
|
31 |
pdf_obj = st.file_uploader("Carga tu documento", type="pdf", on_change=st.cache_resource.clear)
|
32 |
|
33 |
@st.cache_resource
|
|
|
50 |
)
|
51 |
chunks = text_splitter.split_text(text)
|
52 |
|
53 |
+
# Usar HuggingFaceHubEmbeddings
|
|
|
54 |
embeddings = HuggingFaceHubEmbeddings(
|
55 |
repo_id="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
56 |
huggingfacehub_api_token=api_token
|
|
|
68 |
if user_question:
|
69 |
docs = knowledge_base.similarity_search(user_question, 3)
|
70 |
|
71 |
+
# Configurar los parámetros del modelo según el tipo
|
72 |
+
model_kwargs = {}
|
73 |
+
|
74 |
+
# Verificar el tipo de modelo para usar los parámetros adecuados
|
75 |
+
if "flan-t5" in modelo_id:
|
76 |
+
model_kwargs = {"temperature": 0.5, "max_length": 512}
|
77 |
+
elif "bloom" in modelo_id:
|
78 |
+
model_kwargs = {"temperature": 0.7, "max_length": 512}
|
79 |
+
elif "falcon" in modelo_id or "llama" in modelo_id or "gemma" in modelo_id:
|
80 |
+
model_kwargs = {"temperature": 0.1, "max_new_tokens": 512}
|
81 |
+
else:
|
82 |
+
model_kwargs = {"temperature": 0.5, "max_length": 512}
|
83 |
+
|
84 |
+
# Crear el LLM con los parámetros adecuados
|
85 |
llm = HuggingFaceHub(
|
86 |
+
repo_id=modelo_id,
|
87 |
huggingfacehub_api_token=huggingface_api_token,
|
88 |
+
model_kwargs=model_kwargs
|
89 |
)
|
90 |
|
91 |
+
# Prompt diferente según el tipo de modelo
|
92 |
+
if "falcon" in modelo_id or "llama" in modelo_id or "gemma" in modelo_id:
|
93 |
+
prompt_template = """
|
94 |
+
<|system|>
|
95 |
+
Responde a la siguiente pregunta basándote únicamente en el contexto proporcionado.
|
96 |
+
</|system|>
|
97 |
+
|
98 |
+
<|user|>
|
99 |
+
Contexto: {context}
|
100 |
+
|
101 |
+
Pregunta: {question}
|
102 |
+
</|user|>
|
103 |
+
|
104 |
+
<|assistant|>
|
105 |
+
"""
|
106 |
+
else:
|
107 |
+
prompt_template = """
|
108 |
+
Responde a la siguiente pregunta basándote únicamente en el contexto proporcionado.
|
109 |
+
|
110 |
+
Contexto: {context}
|
111 |
+
|
112 |
+
Pregunta: {question}
|
113 |
+
|
114 |
+
Respuesta:
|
115 |
+
"""
|
116 |
|
117 |
PROMPT = PromptTemplate(
|
118 |
template=prompt_template,
|
|
|
121 |
|
122 |
chain = load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)
|
123 |
|
124 |
+
with st.spinner(f"Procesando tu pregunta con {selected_model}..."):
|
125 |
try:
|
126 |
respuesta = chain.run(input_documents=docs, question=user_question)
|
127 |
st.write(respuesta)
|
128 |
except Exception as e:
|
129 |
st.error(f"Error al procesar tu pregunta: {str(e)}")
|
130 |
+
st.info("Sugerencia: Intenta con un modelo diferente. Algunos modelos pueden requerir más recursos o tener limitaciones específicas.")
|
131 |
elif not huggingface_api_token and pdf_obj:
|
132 |
st.warning("Por favor, ingresa tu token de API de Hugging Face para continuar.")
|
requirements.txt
CHANGED
@@ -8,4 +8,4 @@ faiss-cpu==1.7.4
|
|
8 |
accelerate==0.20.3
|
9 |
einops==0.6.1
|
10 |
protobuf==3.20.3
|
11 |
-
tiktoken==0.4.0
|
|
|
8 |
accelerate==0.20.3
|
9 |
einops==0.6.1
|
10 |
protobuf==3.20.3
|
11 |
+
tiktoken==0.4.0
|