manuelcozar55 commited on
Commit
ec25508
verified
1 Parent(s): 8db3a72

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -46
app.py CHANGED
@@ -8,6 +8,10 @@ import csv
8
  import json
9
  import os
10
  import torch
 
 
 
 
11
 
12
  huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
13
 
@@ -15,7 +19,7 @@ huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
15
  if huggingface_token:
16
  login(token=huggingface_token)
17
 
18
- # Configuraci贸n del modelo
19
  @st.cache_resource
20
  def load_llm():
21
  llm = HuggingFaceEndpoint(
@@ -39,6 +43,32 @@ classification_model, classification_tokenizer = load_classification_model()
39
 
40
  id2label = {0: "multas", 1: "politicas_de_privacidad", 2: "contratos", 3: "denuncias", 4: "otros"}
41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  def classify_text(text):
43
  inputs = classification_tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
44
  classification_model.eval()
@@ -47,7 +77,7 @@ def classify_text(text):
47
  logits = outputs.logits
48
  predicted_class_id = logits.argmax(dim=-1).item()
49
  predicted_label = id2label[predicted_class_id]
50
- return f"Clasificaci贸n: {predicted_label}\n\nDocumento:\n{text}"
51
 
52
  def translate(text, target_language):
53
  template = '''
@@ -105,61 +135,62 @@ def handle_uploaded_file(uploaded_file):
105
  return str(e)
106
 
107
  def main():
108
- st.title("LexAIcon: Traduce, resume y explica textos legales")
109
-
110
- st.header("Puedes conversar con este chatbot basado en Mistral7B-Instruct y subir archivos para que ser traducidos resumidos o explicados.")
111
-
112
  st.image("./icon.jpg", width=100)
 
 
113
 
 
 
114
 
115
- if "generated" not in st.session_state:
116
- st.session_state["generated"] = []
 
117
 
118
- # Entrada del usuario
119
- user_input = st.text_input("T煤: ", "")
120
 
121
- # Botones de Resumir, Traducir y Explicar
122
- operation = st.radio("Selecciona una operaci贸n", ["Resumir", "Traducir", "Explicar"])
 
123
 
124
- target_language = None
125
- summary_length = None
 
126
 
127
- if operation == "Traducir":
128
- target_language = st.selectbox("Selecciona el idioma de traducci贸n", ["espa帽ol", "ingl茅s", "franc茅s", "alem谩n"])
129
 
130
- if operation == "Resumir":
131
- summary_length = st.selectbox("Selecciona la longitud del resumen", ["corto", "medio", "largo"])
132
 
133
- # Manejo de archivos subidos
134
- uploaded_files = st.file_uploader("Sube un archivo", type=["txt", "pdf", "docx", "csv", "json"], accept_multiple_files=True)
135
-
136
- if st.button("Enviar"):
137
- if user_input:
138
- response = llm_engine_hf.invoke(user_input)
139
- st.session_state.generated.append({"user": user_input, "bot": response['generated_text']})
140
-
141
- if st.button("Ejecutar"):
142
- if uploaded_files:
143
  for uploaded_file in uploaded_files:
144
  file_content = handle_uploaded_file(uploaded_file)
145
- if operation == "Resumir":
146
- if summary_length == "corto":
147
- length = "de aproximadamente 50 palabras"
148
- elif summary_length == "medio":
149
- length = "de aproximadamente 100 palabras"
150
- elif summary_length == "largo":
151
- length = "de aproximadamente 500 palabras"
152
- result = summarize(file_content, length)
153
- elif operation == "Traducir":
154
- result = translate(file_content, target_language)
155
- elif operation == "Explicar":
156
- result = classify_text(file_content)
157
- st.write(result)
158
-
159
- if st.session_state.get("generated"):
160
- for chat in st.session_state["generated"]:
161
- st.write(f"T煤: {chat['user']}")
162
- st.write(f"Chatbot: {chat['bot']}")
 
 
 
 
 
 
 
163
 
164
  if __name__ == "__main__":
165
  main()
 
8
  import json
9
  import os
10
  import torch
11
+ from langchain.document_loaders import JSONLoader
12
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
13
+ from langchain.embeddings import HuggingFaceEmbeddings
14
+ from langchain.vectorstores import FAISS
15
 
16
  huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
17
 
 
19
  if huggingface_token:
20
  login(token=huggingface_token)
21
 
22
+ # Configuraci贸n del modelo de generaci贸n de texto
23
  @st.cache_resource
24
  def load_llm():
25
  llm = HuggingFaceEndpoint(
 
43
 
44
  id2label = {0: "multas", 1: "politicas_de_privacidad", 2: "contratos", 3: "denuncias", 4: "otros"}
45
 
46
+ # Cargar documentos JSON para cada categor铆a
47
+ @st.cache_resource
48
+ def load_json_documents():
49
+ documents = {}
50
+ categories = ["multas", "politicas_de_privacidad", "contratos", "denuncias", "otros"]
51
+ for category in categories:
52
+ with open(f"./{category}.json", "r", encoding="utf-8") as f:
53
+ data = json.load(f)["questions_and_answers"]
54
+ documents[category] = [entry["question"] + " " + entry["answer"] for entry in data]
55
+ return documents
56
+
57
+ json_documents = load_json_documents()
58
+
59
+ # Configuraci贸n de Embeddings y Vector Stores
60
+ @st.cache_resource
61
+ def create_vector_store():
62
+ embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-l6-v2", model_kwargs={"device": "cpu"})
63
+ vector_stores = {}
64
+ for category, docs in json_documents.items():
65
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
66
+ split_docs = [doc for doc in text_splitter.split_text(docs)]
67
+ vector_stores[category] = FAISS.from_texts(split_docs, embeddings)
68
+ return vector_stores
69
+
70
+ vector_stores = create_vector_store()
71
+
72
  def classify_text(text):
73
  inputs = classification_tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
74
  classification_model.eval()
 
77
  logits = outputs.logits
78
  predicted_class_id = logits.argmax(dim=-1).item()
79
  predicted_label = id2label[predicted_class_id]
80
+ return predicted_label
81
 
82
  def translate(text, target_language):
83
  template = '''
 
135
  return str(e)
136
 
137
  def main():
 
 
 
 
138
  st.image("./icon.jpg", width=100)
139
+ st.title("LexAIcon")
140
+ st.write("Puedes conversar con este chatbot basado en Mistral7B-Instruct y subir archivos para que el chatbot los procese.")
141
 
142
+ if "messages" not in st.session_state:
143
+ st.session_state["messages"] = [{"role": "assistant", "content": "驴C贸mo puedo ayudarte?"}]
144
 
145
+ with st.sidebar:
146
+ st.text_input("HuggingFace Token", value=huggingface_token, type="password", key="huggingface_token")
147
+ st.caption("[Consigue un HuggingFace Token](https://huggingface.co/settings/tokens)")
148
 
149
+ for msg in st.session_state.messages:
150
+ st.chat_message(msg["role"]).write(msg["content"])
151
 
152
+ if prompt := st.chat_input():
153
+ st.session_state.messages.append({"role": "user", "content": prompt})
154
+ st.chat_message("user").write(prompt)
155
 
156
+ operation = st.radio("Selecciona una operaci贸n", ["Resumir", "Traducir", "Explicar"])
157
+ target_language = None
158
+ summary_length = None
159
 
160
+ if operation == "Traducir":
161
+ target_language = st.selectbox("Selecciona el idioma de traducci贸n", ["espa帽ol", "ingl茅s", "franc茅s", "alem谩n"])
162
 
163
+ if operation == "Resumir":
164
+ summary_length = st.selectbox("Selecciona la longitud del resumen", ["corto", "medio", "largo"])
165
 
166
+ if uploaded_files := st.file_uploader("Sube un archivo", type=["txt", "pdf", "docx", "csv", "json"], accept_multiple_files=True):
 
 
 
 
 
 
 
 
 
167
  for uploaded_file in uploaded_files:
168
  file_content = handle_uploaded_file(uploaded_file)
169
+ classification = classify_text(file_content)
170
+ vector_store = vector_stores[classification]
171
+ search_docs = vector_store.similarity_search(prompt)
172
+ context = " ".join([doc.page_content for doc in search_docs])
173
+ prompt_with_context = f"Contexto: {context}\n\nPregunta: {prompt}"
174
+ response = llm_engine_hf.invoke(prompt_with_context)
175
+ msg = response.content
176
+
177
+ elif operation == "Resumir":
178
+ if summary_length == "corto":
179
+ length = "de aproximadamente 50 palabras"
180
+ elif summary_length == "medio":
181
+ length = "de aproximadamente 100 palabras"
182
+ elif summary_length == "largo":
183
+ length = "de aproximadamente 500 palabras"
184
+ msg = summarize(prompt, length)
185
+
186
+ elif operation == "Traducir":
187
+ msg = translate(prompt, target_language)
188
+
189
+ else:
190
+ msg = llm_engine_hf.invoke(prompt).content
191
+
192
+ st.session_state.messages.append({"role": "assistant", "content": msg})
193
+ st.chat_message("assistant").write(msg)
194
 
195
  if __name__ == "__main__":
196
  main()