lukecurtin32 commited on
Commit
7f231be
·
verified ·
1 Parent(s): 30b80f8

TranslaSite

Browse files

Add app.py with custom glossary integration

Files changed (1) hide show
  1. app.py +98 -0
app.py ADDED
@@ -0,0 +1,98 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ from transformers import pipeline
3
+ import re
4
+
5
+ # Initialize the translation pipeline
6
+ translator = pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-es")
7
+
8
+ # Define the custom glossary
9
+ custom_glossary = {
10
+ "Change Order": "Orden de Cambio",
11
+ "RFI (Request for Information)": "RFI (Solicitud de Información)",
12
+ "Submittals": "Envíos",
13
+ "Punch List": "Lista de Reparaciones",
14
+ "GMP (Guaranteed Maximum Price)": "GMP (Precio Máximo Garantizado)",
15
+ "Critical Path Method (CPM)": "Método de Ruta Crítica (CPM)",
16
+ "LEED Certification": "Certificación LEED",
17
+ "BIM (Building Information Modeling)": "BIM (Modelado de Información de Construcción)",
18
+ "Retainage": "Retención",
19
+ "Precast Concrete": "Hormigón Prefabricado",
20
+ "Greenfield Project": "Proyecto Greenfield",
21
+ "Brownfield Project": "Proyecto Brownfield",
22
+ "Constructability Review": "Revisión de Constructibilidad",
23
+ "Value Engineering": "Ingeniería de Valor",
24
+ "Design-Build": "Diseño-Construcción",
25
+ "Cost Estimation": "Estimación de Costos",
26
+ "Erosion and Sediment Control (ESC)": "Control de Erosión y Sedimentos (ESC)",
27
+ "OSHA Compliance": "Cumplimiento con OSHA",
28
+ "Project Closeout": "Cierre del Proyecto",
29
+ "Integrated Project Delivery (IPD)": "Entrega Integrada de Proyectos (IPD)",
30
+ "Prefabrication": "Prefabricación",
31
+ "HVAC (Heating, Ventilation, Air Conditioning)": "HVAC (Calefacción, Ventilación y Aire Acondicionado)",
32
+ "Sustainability": "Sostenibilidad",
33
+ "RFP (Request for Proposal)": "RFP (Solicitud de Propuesta)",
34
+ "Safety Data Sheet (SDS)": "Hoja de Datos de Seguridad (SDS)",
35
+ "Load-Bearing Wall": "Muro de Carga",
36
+ "Foundation Types": "Tipos de Cimientos",
37
+ "Seismic Retrofitting": "Refuerzo Sísmico",
38
+ "Fireproofing": "Protección contra Incendios",
39
+ "Commissioning": "Puesta en Servicio",
40
+ "Facade": "Fachada",
41
+ "Thermal Insulation": "Aislamiento Térmico",
42
+ "Waterproofing": "Impermeabilización",
43
+ "Zoning": "Zonificación",
44
+ "EPC (Engineering, Procurement, Construction)": "EPC (Ingeniería, Adquisición y Construcción)",
45
+ "Project Milestone": "Hito del Proyecto",
46
+ "Subcontractor": "Subcontratista",
47
+ "Quality Assurance (QA)": "Aseguramiento de Calidad (QA)",
48
+ "Quality Control (QC)": "Control de Calidad (QC)",
49
+ "Baseline Schedule": "Cronograma Base",
50
+ "Material Takeoff": "Lista de Materiales",
51
+ "Lifecycle Costs": "Costos de Ciclo de Vida",
52
+ "Building Envelope": "Envolvente del Edificio",
53
+ "MEP (Mechanical, Electrical, Plumbing)": "MEP (Mecánica, Eléctrica, Fontanería)",
54
+ "As-Built Drawings": "Planos Tal Como Construidos",
55
+ "Hazardous Waste Management": "Gestión de Residuos Peligrosos",
56
+ "Indoor Air Quality (IAQ)": "Calidad del Aire Interior (IAQ)",
57
+ "Renewable Energy Systems": "Sistemas de Energía Renovable",
58
+ "Temporary Works": "Obras Temporales",
59
+ "Construction Waste Diversion": "Desviación de Residuos de Construcción"
60
+ }
61
+
62
+ def preprocess_text(text, glossary):
63
+ # Sort terms by length to handle longer terms first
64
+ sorted_terms = sorted(glossary.keys(), key=len, reverse=True)
65
+ for term in sorted_terms:
66
+ # Use a unique placeholder to avoid conflicts
67
+ placeholder = f"[[{term}]]"
68
+ # Use regex to replace whole words only
69
+ text = re.sub(rf'\b{re.escape(term)}\b', placeholder, text)
70
+ return text
71
+
72
+ def postprocess_text(text, glossary):
73
+ for term, translation in glossary.items():
74
+ placeholder = f"[[{term}]]"
75
+ text = text.replace(placeholder, translation)
76
+ return text
77
+
78
+ def translate_text(text):
79
+ # Preprocess the text to replace glossary terms with placeholders
80
+ preprocessed_text = preprocess_text(text, custom_glossary)
81
+ # Translate the preprocessed text
82
+ translated = translator(preprocessed_text)[0]['translation_text']
83
+ # Postprocess the translated text to replace placeholders with actual terms
84
+ final_translation = postprocess_text(translated, custom_glossary)
85
+ return final_translation
86
+
87
+ # Create the Gradio interface
88
+ iface = gr.Interface(
89
+ fn=translate_text,
90
+ inputs=gr.Textbox(lines=5, label="Input Text (English)"),
91
+ outputs=gr.Textbox(label="Translated Text (Spanish)"),
92
+ title="English to Spanish Translator",
93
+ description="Enter English text to translate it into Spanish, with accurate translation of construction and project management terms."
94
+ )
95
+
96
+ # Launch the interface
97
+ if __name__ == "__main__":
98
+ iface.launch()