Spaces:
Runtime error
Runtime error
lukecurtin32
commited on
TranslaSite
Browse filesAdd app.py with custom glossary integration
app.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from transformers import pipeline
|
3 |
+
import re
|
4 |
+
|
5 |
+
# Initialize the translation pipeline
|
6 |
+
translator = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-es")
|
7 |
+
|
8 |
+
# Define the custom glossary
|
9 |
+
custom_glossary = {
|
10 |
+
"Change Order": "Orden de Cambio",
|
11 |
+
"RFI (Request for Information)": "RFI (Solicitud de Información)",
|
12 |
+
"Submittals": "Envíos",
|
13 |
+
"Punch List": "Lista de Reparaciones",
|
14 |
+
"GMP (Guaranteed Maximum Price)": "GMP (Precio Máximo Garantizado)",
|
15 |
+
"Critical Path Method (CPM)": "Método de Ruta Crítica (CPM)",
|
16 |
+
"LEED Certification": "Certificación LEED",
|
17 |
+
"BIM (Building Information Modeling)": "BIM (Modelado de Información de Construcción)",
|
18 |
+
"Retainage": "Retención",
|
19 |
+
"Precast Concrete": "Hormigón Prefabricado",
|
20 |
+
"Greenfield Project": "Proyecto Greenfield",
|
21 |
+
"Brownfield Project": "Proyecto Brownfield",
|
22 |
+
"Constructability Review": "Revisión de Constructibilidad",
|
23 |
+
"Value Engineering": "Ingeniería de Valor",
|
24 |
+
"Design-Build": "Diseño-Construcción",
|
25 |
+
"Cost Estimation": "Estimación de Costos",
|
26 |
+
"Erosion and Sediment Control (ESC)": "Control de Erosión y Sedimentos (ESC)",
|
27 |
+
"OSHA Compliance": "Cumplimiento con OSHA",
|
28 |
+
"Project Closeout": "Cierre del Proyecto",
|
29 |
+
"Integrated Project Delivery (IPD)": "Entrega Integrada de Proyectos (IPD)",
|
30 |
+
"Prefabrication": "Prefabricación",
|
31 |
+
"HVAC (Heating, Ventilation, Air Conditioning)": "HVAC (Calefacción, Ventilación y Aire Acondicionado)",
|
32 |
+
"Sustainability": "Sostenibilidad",
|
33 |
+
"RFP (Request for Proposal)": "RFP (Solicitud de Propuesta)",
|
34 |
+
"Safety Data Sheet (SDS)": "Hoja de Datos de Seguridad (SDS)",
|
35 |
+
"Load-Bearing Wall": "Muro de Carga",
|
36 |
+
"Foundation Types": "Tipos de Cimientos",
|
37 |
+
"Seismic Retrofitting": "Refuerzo Sísmico",
|
38 |
+
"Fireproofing": "Protección contra Incendios",
|
39 |
+
"Commissioning": "Puesta en Servicio",
|
40 |
+
"Facade": "Fachada",
|
41 |
+
"Thermal Insulation": "Aislamiento Térmico",
|
42 |
+
"Waterproofing": "Impermeabilización",
|
43 |
+
"Zoning": "Zonificación",
|
44 |
+
"EPC (Engineering, Procurement, Construction)": "EPC (Ingeniería, Adquisición y Construcción)",
|
45 |
+
"Project Milestone": "Hito del Proyecto",
|
46 |
+
"Subcontractor": "Subcontratista",
|
47 |
+
"Quality Assurance (QA)": "Aseguramiento de Calidad (QA)",
|
48 |
+
"Quality Control (QC)": "Control de Calidad (QC)",
|
49 |
+
"Baseline Schedule": "Cronograma Base",
|
50 |
+
"Material Takeoff": "Lista de Materiales",
|
51 |
+
"Lifecycle Costs": "Costos de Ciclo de Vida",
|
52 |
+
"Building Envelope": "Envolvente del Edificio",
|
53 |
+
"MEP (Mechanical, Electrical, Plumbing)": "MEP (Mecánica, Eléctrica, Fontanería)",
|
54 |
+
"As-Built Drawings": "Planos Tal Como Construidos",
|
55 |
+
"Hazardous Waste Management": "Gestión de Residuos Peligrosos",
|
56 |
+
"Indoor Air Quality (IAQ)": "Calidad del Aire Interior (IAQ)",
|
57 |
+
"Renewable Energy Systems": "Sistemas de Energía Renovable",
|
58 |
+
"Temporary Works": "Obras Temporales",
|
59 |
+
"Construction Waste Diversion": "Desviación de Residuos de Construcción"
|
60 |
+
}
|
61 |
+
|
62 |
+
def preprocess_text(text, glossary):
|
63 |
+
# Sort terms by length to handle longer terms first
|
64 |
+
sorted_terms = sorted(glossary.keys(), key=len, reverse=True)
|
65 |
+
for term in sorted_terms:
|
66 |
+
# Use a unique placeholder to avoid conflicts
|
67 |
+
placeholder = f"[[{term}]]"
|
68 |
+
# Use regex to replace whole words only
|
69 |
+
text = re.sub(rf'\b{re.escape(term)}\b', placeholder, text)
|
70 |
+
return text
|
71 |
+
|
72 |
+
def postprocess_text(text, glossary):
|
73 |
+
for term, translation in glossary.items():
|
74 |
+
placeholder = f"[[{term}]]"
|
75 |
+
text = text.replace(placeholder, translation)
|
76 |
+
return text
|
77 |
+
|
78 |
+
def translate_text(text):
|
79 |
+
# Preprocess the text to replace glossary terms with placeholders
|
80 |
+
preprocessed_text = preprocess_text(text, custom_glossary)
|
81 |
+
# Translate the preprocessed text
|
82 |
+
translated = translator(preprocessed_text)[0]['translation_text']
|
83 |
+
# Postprocess the translated text to replace placeholders with actual terms
|
84 |
+
final_translation = postprocess_text(translated, custom_glossary)
|
85 |
+
return final_translation
|
86 |
+
|
87 |
+
# Create the Gradio interface
|
88 |
+
iface = gr.Interface(
|
89 |
+
fn=translate_text,
|
90 |
+
inputs=gr.Textbox(lines=5, label="Input Text (English)"),
|
91 |
+
outputs=gr.Textbox(label="Translated Text (Spanish)"),
|
92 |
+
title="English to Spanish Translator",
|
93 |
+
description="Enter English text to translate it into Spanish, with accurate translation of construction and project management terms."
|
94 |
+
)
|
95 |
+
|
96 |
+
# Launch the interface
|
97 |
+
if __name__ == "__main__":
|
98 |
+
iface.launch()
|