Spaces:
Sleeping
Sleeping
Delete app.py
Browse files
app.py
DELETED
@@ -1,132 +0,0 @@
|
|
1 |
-
import streamlit as st
|
2 |
-
import os
|
3 |
-
from PyPDF2 import PdfReader
|
4 |
-
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
5 |
-
from langchain.embeddings import HuggingFaceHubEmbeddings
|
6 |
-
from langchain.vectorstores import FAISS
|
7 |
-
from langchain.llms import HuggingFaceHub
|
8 |
-
from langchain.chains.question_answering import load_qa_chain
|
9 |
-
from langchain.prompts import PromptTemplate
|
10 |
-
|
11 |
-
st.set_page_config(page_title='preguntaDOC')
|
12 |
-
st.header("Pregunta a tu PDF")
|
13 |
-
|
14 |
-
# Campo para el token de Hugging Face
|
15 |
-
huggingface_api_token = st.text_input('Hugging Face API Token (requerido)', type='password')
|
16 |
-
|
17 |
-
# Selecci贸n de modelo
|
18 |
-
modelo_options = {
|
19 |
-
"Google FLAN-T5 Small": "google/flan-t5-small",
|
20 |
-
"Google FLAN-T5 Base": "google/flan-t5-base",
|
21 |
-
"BLOOM 560M": "bigscience/bloom-560m",
|
22 |
-
"BLOOM 1.1B": "bigscience/bloom-1b1",
|
23 |
-
"Falcon 7B Instruct": "tiiuae/falcon-7b-instruct",
|
24 |
-
"Gemma 2B": "google/gemma-2b",
|
25 |
-
"Gemma 2B Instruct": "google/gemma-2b-it"
|
26 |
-
}
|
27 |
-
|
28 |
-
selected_model = st.selectbox("Selecciona un modelo:", list(modelo_options.keys()))
|
29 |
-
modelo_id = modelo_options[selected_model]
|
30 |
-
|
31 |
-
pdf_obj = st.file_uploader("Carga tu documento", type="pdf", on_change=st.cache_resource.clear)
|
32 |
-
|
33 |
-
@st.cache_resource
|
34 |
-
def create_embeddings(pdf, api_token):
|
35 |
-
if not api_token:
|
36 |
-
st.error("Se requiere un token de API de Hugging Face")
|
37 |
-
return None
|
38 |
-
|
39 |
-
os.environ["HUGGINGFACEHUB_API_TOKEN"] = api_token
|
40 |
-
|
41 |
-
pdf_reader = PdfReader(pdf)
|
42 |
-
text = ""
|
43 |
-
for page in pdf_reader.pages:
|
44 |
-
text += page.extract_text()
|
45 |
-
|
46 |
-
text_splitter = RecursiveCharacterTextSplitter(
|
47 |
-
chunk_size=800,
|
48 |
-
chunk_overlap=100,
|
49 |
-
length_function=len
|
50 |
-
)
|
51 |
-
chunks = text_splitter.split_text(text)
|
52 |
-
|
53 |
-
# Usar HuggingFaceHubEmbeddings
|
54 |
-
embeddings = HuggingFaceHubEmbeddings(
|
55 |
-
repo_id="sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
|
56 |
-
huggingfacehub_api_token=api_token
|
57 |
-
)
|
58 |
-
|
59 |
-
knowledge_base = FAISS.from_texts(chunks, embeddings)
|
60 |
-
return knowledge_base
|
61 |
-
|
62 |
-
if pdf_obj and huggingface_api_token:
|
63 |
-
knowledge_base = create_embeddings(pdf_obj, huggingface_api_token)
|
64 |
-
|
65 |
-
if knowledge_base:
|
66 |
-
user_question = st.text_input("Haz una pregunta sobre tu PDF:")
|
67 |
-
|
68 |
-
if user_question:
|
69 |
-
docs = knowledge_base.similarity_search(user_question, 3)
|
70 |
-
|
71 |
-
# Configurar los par谩metros del modelo seg煤n el tipo
|
72 |
-
model_kwargs = {}
|
73 |
-
|
74 |
-
# Verificar el tipo de modelo para usar los par谩metros adecuados
|
75 |
-
if "flan-t5" in modelo_id:
|
76 |
-
model_kwargs = {"temperature": 0.5, "max_length": 512}
|
77 |
-
elif "bloom" in modelo_id:
|
78 |
-
model_kwargs = {"temperature": 0.7, "max_length": 512}
|
79 |
-
elif "falcon" in modelo_id or "llama" in modelo_id or "gemma" in modelo_id:
|
80 |
-
model_kwargs = {"temperature": 0.1, "max_new_tokens": 512}
|
81 |
-
else:
|
82 |
-
model_kwargs = {"temperature": 0.5, "max_length": 512}
|
83 |
-
|
84 |
-
# Crear el LLM con los par谩metros adecuados
|
85 |
-
llm = HuggingFaceHub(
|
86 |
-
repo_id=modelo_id,
|
87 |
-
huggingfacehub_api_token=huggingface_api_token,
|
88 |
-
model_kwargs=model_kwargs
|
89 |
-
)
|
90 |
-
|
91 |
-
# Prompt diferente seg煤n el tipo de modelo
|
92 |
-
if "falcon" in modelo_id or "llama" in modelo_id or "gemma" in modelo_id:
|
93 |
-
prompt_template = """
|
94 |
-
<|system|>
|
95 |
-
Responde a la siguiente pregunta bas谩ndote 煤nicamente en el contexto proporcionado.
|
96 |
-
</|system|>
|
97 |
-
|
98 |
-
<|user|>
|
99 |
-
Contexto: {context}
|
100 |
-
|
101 |
-
Pregunta: {question}
|
102 |
-
</|user|>
|
103 |
-
|
104 |
-
<|assistant|>
|
105 |
-
"""
|
106 |
-
else:
|
107 |
-
prompt_template = """
|
108 |
-
Responde a la siguiente pregunta bas谩ndote 煤nicamente en el contexto proporcionado.
|
109 |
-
|
110 |
-
Contexto: {context}
|
111 |
-
|
112 |
-
Pregunta: {question}
|
113 |
-
|
114 |
-
Respuesta:
|
115 |
-
"""
|
116 |
-
|
117 |
-
PROMPT = PromptTemplate(
|
118 |
-
template=prompt_template,
|
119 |
-
input_variables=["context", "question"]
|
120 |
-
)
|
121 |
-
|
122 |
-
chain = load_qa_chain(llm, chain_type="stuff", prompt=PROMPT)
|
123 |
-
|
124 |
-
with st.spinner(f"Procesando tu pregunta con {selected_model}..."):
|
125 |
-
try:
|
126 |
-
respuesta = chain.run(input_documents=docs, question=user_question)
|
127 |
-
st.write(respuesta)
|
128 |
-
except Exception as e:
|
129 |
-
st.error(f"Error al procesar tu pregunta: {str(e)}")
|
130 |
-
st.info("Sugerencia: Intenta con un modelo diferente. Algunos modelos pueden requerir m谩s recursos o tener limitaciones espec铆ficas.")
|
131 |
-
elif not huggingface_api_token and pdf_obj:
|
132 |
-
st.warning("Por favor, ingresa tu token de API de Hugging Face para continuar.")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|