manuelcozar55 commited on
Commit
30ed7b0
verified
1 Parent(s): 1893e2e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +88 -18
app.py CHANGED
@@ -1,5 +1,5 @@
1
  import streamlit as st
2
- from transformers import AutoTokenizer
3
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
4
  from huggingface_hub import login
5
  from PyPDF2 import PdfReader
@@ -7,6 +7,7 @@ from docx import Document
7
  import csv
8
  import json
9
  import os
 
10
 
11
  huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
12
 
@@ -27,17 +28,57 @@ def load_llm():
27
 
28
  llm_engine_hf, tokenizer = load_llm()
29
 
30
- st.title("LexAIcon")
31
- st.write("Puedes conversar con este chatbot basado en Mistral7B-Instruct y subir archivos para que el chatbot los procese.")
 
 
 
 
32
 
33
- if "generated" not in st.session_state:
34
- st.session_state["generated"] = []
35
- if "past" not in st.session_state:
36
- st.session_state["past"] = []
 
 
 
 
 
 
 
 
 
37
 
38
- def generate_response(prompt):
39
- response = llm_engine_hf.invoke(prompt)
40
- return response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
41
 
42
  def handle_uploaded_file(uploaded_file):
43
  try:
@@ -65,9 +106,23 @@ def handle_uploaded_file(uploaded_file):
65
  except Exception as e:
66
  return str(e)
67
 
 
 
 
 
 
 
 
 
68
  # Entrada del usuario
69
  user_input = st.text_input("T煤: ", "")
70
 
 
 
 
 
 
 
71
  # Manejo de archivos subidos
72
  uploaded_files = st.file_uploader("Sube un archivo", type=["txt", "pdf", "docx", "csv", "json"], accept_multiple_files=True)
73
 
@@ -76,13 +131,28 @@ if st.button("Enviar"):
76
  response = generate_response(user_input)
77
  st.session_state.generated.append({"user": user_input, "bot": response})
78
 
79
- if st.session_state["generated"]:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80
  for chat in st.session_state["generated"]:
81
  st.write(f"T煤: {chat['user']}")
82
- st.write(f"Chatbot: {chat['bot']}")
83
-
84
- if uploaded_files:
85
- for uploaded_file in uploaded_files:
86
- st.write(f"Archivo subido: {uploaded_file.name}")
87
- file_content = handle_uploaded_file(uploaded_file)
88
- st.write(file_content)
 
1
  import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForSequenceClassification
3
  from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint
4
  from huggingface_hub import login
5
  from PyPDF2 import PdfReader
 
7
  import csv
8
  import json
9
  import os
10
+ import torch
11
 
12
  huggingface_token = os.getenv('HUGGINGFACE_TOKEN')
13
 
 
28
 
29
  llm_engine_hf, tokenizer = load_llm()
30
 
31
+ # Configuraci贸n del modelo de clasificaci贸n
32
+ @st.cache_resource
33
+ def load_classification_model():
34
+ tokenizer = AutoTokenizer.from_pretrained("mrm8488/legal-longformer-base-8192-spanish")
35
+ model = AutoModelForSequenceClassification.from_pretrained("mrm8488/legal-longformer-base-8192-spanish")
36
+ return model, tokenizer
37
 
38
+ classification_model, classification_tokenizer = load_classification_model()
39
+
40
+ id2label = {0: "multas", 1: "politicas_de_privacidad", 2: "contratos", 3: "denuncias", 4: "otros"}
41
+
42
+ def classify_text(text):
43
+ inputs = classification_tokenizer(text, return_tensors="pt", max_length=4096, truncation=True, padding="max_length")
44
+ classification_model.eval()
45
+ with torch.no_grad():
46
+ outputs = classification_model(**inputs)
47
+ logits = outputs.logits
48
+ predicted_class_id = logits.argmax(dim=-1).item()
49
+ predicted_label = id2label[predicted_class_id]
50
+ return f"Clasificaci贸n: {predicted_label}\n\nDocumento:\n{text}"
51
 
52
+ def translate(text, target_language):
53
+ template = '''
54
+ Por favor, traduzca el siguiente documento al {LANGUAGE}:
55
+ <document>
56
+ {TEXT}
57
+ </document>
58
+ Aseg煤rese de que la traducci贸n sea precisa y conserve el significado original del documento.
59
+ '''
60
+
61
+ formatted_prompt = template.replace("{TEXT}", text).replace("{LANGUAGE}", target_language)
62
+ inputs = tokenizer(formatted_prompt, return_tensors="pt")
63
+ outputs = llm_engine_hf.invoke(formatted_prompt)
64
+ translated_text = outputs.content
65
+
66
+ return translated_text
67
+
68
+ def summarize(text, length):
69
+ template = f'''
70
+ Por favor, haga un resumen {length} del siguiente documento:
71
+ <document>
72
+ {text}
73
+ </document>
74
+ Aseg煤rese de que el resumen sea conciso y conserve el significado original del documento.
75
+ '''
76
+
77
+ inputs = tokenizer(template, return_tensors="pt")
78
+ outputs = llm_engine_hf.invoke(template)
79
+ summarized_text = outputs.content
80
+
81
+ return summarized_text
82
 
83
  def handle_uploaded_file(uploaded_file):
84
  try:
 
106
  except Exception as e:
107
  return str(e)
108
 
109
+ st.title("LexAIcon")
110
+ st.write("Puedes conversar con este chatbot basado en Mistral7B-Instruct y subir archivos para que el chatbot los procese.")
111
+
112
+ if "generated" not in st.session_state:
113
+ st.session_state["generated"] = []
114
+ if "past" not in st.session_state:
115
+ st.session_state["past"] = []
116
+
117
  # Entrada del usuario
118
  user_input = st.text_input("T煤: ", "")
119
 
120
+ # Opciones para la traducci贸n
121
+ target_language = st.selectbox("Selecciona el idioma de traducci贸n", ["espa帽ol", "ingl茅s", "franc茅s", "alem谩n"])
122
+
123
+ # Opciones para el resumen
124
+ summary_length = st.selectbox("Selecciona la longitud del resumen", ["corto", "medio", "largo"])
125
+
126
  # Manejo de archivos subidos
127
  uploaded_files = st.file_uploader("Sube un archivo", type=["txt", "pdf", "docx", "csv", "json"], accept_multiple_files=True)
128
 
 
131
  response = generate_response(user_input)
132
  st.session_state.generated.append({"user": user_input, "bot": response})
133
 
134
+ # Botones de Resumir, Traducir y Explicar
135
+ operation = st.radio("Selecciona una operaci贸n", ["Resumir", "Traducir", "Explicar"])
136
+
137
+ if st.button("Ejecutar"):
138
+ if uploaded_files:
139
+ for uploaded_file in uploaded_files:
140
+ file_content = handle_uploaded_file(uploaded_file)
141
+ if operation == "Resumir":
142
+ if summary_length == "corto":
143
+ length = "de aproximadamente 50 palabras"
144
+ elif summary_length == "medio":
145
+ length = "de aproximadamente 100 palabras"
146
+ elif summary_length == "largo":
147
+ length = "de aproximadamente 500 palabras"
148
+ result = summarize(file_content, length)
149
+ elif operation == "Traducir":
150
+ result = translate(file_content, target_language)
151
+ elif operation == "Explicar":
152
+ result = classify_text(file_content)
153
+ st.write(result)
154
+
155
+ if st.session_state.get("generated"):
156
  for chat in st.session_state["generated"]:
157
  st.write(f"T煤: {chat['user']}")
158
+ st.write(f"Chatbot: {chat['bot']}")