TranslaSite / app.py
lukecurtin32's picture
Update app.py
5f80acb verified
raw
history blame
6 kB
import gradio as gr
import re
import torch
from transformers import pipeline
# Initialize the translation pipeline
def initialize_translator():
try:
device = 0 if torch.cuda.is_available() else -1 # Use GPU if available, otherwise CPU
return pipeline("translation", model="Helsinki-NLP/opus-mt-tc-big-en-es", device=device)
except Exception as e:
raise RuntimeError(f"Failed to initialize translation pipeline: {e}")
translator = initialize_translator()
# Define the custom glossary
custom_glossary = {
"Change Order": "Orden de Cambio",
"RFI (Request for Information)": "RFI (Solicitud de Información)",
"Submittals": "Envíos",
"Punch List": "Lista de Reparaciones",
"GMP (Guaranteed Maximum Price)": "GMP (Precio Máximo Garantizado)",
"Critical Path Method (CPM)": "Método de Ruta Crítica (CPM)",
"LEED Certification": "Certificación LEED",
"BIM (Building Information Modeling)": "BIM (Modelado de Información de Construcción)",
"Retainage": "Retención",
"Precast Concrete": "Hormigón Prefabricado",
"Aggregate": "Áridos",
"Allowance": "Asignación",
"Apprenticeship": "Aprendizaje",
"Backfilling": "Relleno",
"Beam": "Viga",
"Blueprint": "Plano",
"Bollard": "Bolardo",
"Bracing": "Arriostramiento",
"Brick Masonry": "Albañilería de Ladrillo",
"Budget Overrun": "Sobrecosto Presupuestario",
"Building Code": "Código de Construcción",
"Cantilever": "Voladizo",
"Caulking": "Sellado",
"Ceiling Joist": "Vigueta de Techo",
"Cement": "Cemento",
"Certificate of Occupancy": "Certificado de Ocupación",
"Chain Link Fence": "Cerca de Malla",
"Chipping": "Picado",
"Cladding": "Revestimiento",
"Clear Span": "Luz Libre",
"Construction Drawings": "Planos de Construcción",
"Construction Schedule": "Cronograma de Construcción",
"Construction Staging": "Escenario de Construcción",
"Contingency": "Contingencia",
"Contract Bond": "Fianza de Contrato",
"Curtain Wall": "Muro Cortina",
"Damp Proofing": "Impermeabilización",
"Dead Load": "Carga Muerta",
"Demolition": "Demolición",
"Design Development": "Desarrollo de Diseño",
"Detail Drawing": "Dibujo Detallado",
"Drilled Pier": "Pilote Perforado",
"Drywall": "Tablaroca",
"Earthmoving": "Movimiento de Tierras",
"Elevation": "Elevación",
"Emergency Egress": "Salida de Emergencia",
"Energy Modeling": "Modelado Energético",
"Excavation": "Excavación",
"Expansion Joint": "Junta de Expansión",
"Exterior Finish": "Acabado Exterior",
"Fasteners": "Fijaciones",
"Field Verification": "Verificación en Campo",
"Final Walkthrough": "Inspección Final",
"Finishing": "Acabado",
"Fire Barrier": "Barrera Contra Incendios",
"Flange": "Brida",
"Floor Slab": "Losa de Piso",
"Footing": "Zapata",
"Foundation Wall": "Muro de Cimentación",
"Framing": "Estructura",
"Frost Line": "Línea de Heladas",
"Grade Beam": "Viga de Cimentación",
"Grading": "Nivelación",
"Ground Penetrating Radar": "Radar de Penetración Terrestre",
"Hardscape": "Paisajismo Duro",
"HVAC Ductwork": "Conductos HVAC",
"I-Beam": "Viga en I",
"Insulation Rating": "Clasificación de Aislamiento",
"Interior Finish": "Acabado Interior",
"Joist": "Vigueta",
"Keyway": "Entalladura",
"Lath": "Listón",
"Load Testing": "Prueba de Carga",
"Louver": "Persiana",
"Masonry": "Albañilería",
"Moisture Barrier": "Barrera de Humedad",
"Mortar": "Mortero",
"Overhang": "Voladizo",
"Parapet": "Pretil",
"Pile": "Pilote",
"Plaster": "Yeso",
"Plumbing Fixtures": "Accesorios de Plomería",
"Post-Tension Slab": "Losa Postensada",
"Prefabricated Components": "Componentes Prefabricados",
"Safety Harness": "Arnés de Seguridad",
"Scaffolding": "Andamios",
"Structural Integrity": "Integridad Estructural",
"Structural Steel": "Acero Estructural",
"Temporary Fence": "Cerca Temporal",
"Zoning Ordinance": "Ordenanza de Zonificación",
"Thermal Insulation": "Aislamiento Térmico",
"Sealant": "Sellador",
"Retaining Wall": "Muro de Contención",
"Screw Anchor": "Anclaje de Tornillo",
"Rain Screen": "Pantalla de Lluvia",
"Facade Treatment": "Tratamiento de Fachada",
"Fireproofing": "Protección contra Incendios",
}
# Preprocess text to replace glossary terms with placeholders
def preprocess_text(text, glossary):
sorted_terms = sorted(glossary.keys(), key=len, reverse=True)
for term in sorted_terms:
placeholder = f"[[{term}]]"
text = re.sub(rf'\b{re.escape(term)}\b', placeholder, text, flags=re.IGNORECASE)
return text
# Postprocess text to replace placeholders with glossary terms
def postprocess_text(text, glossary):
for term, translation in glossary.items():
placeholder = f"[[{term}]]"
text = text.replace(placeholder, translation)
return text
# Translate text
def translate_text(text):
try:
# Preprocess the text
preprocessed_text = preprocess_text(text, custom_glossary)
# Translate the preprocessed text
translated = translator(preprocessed_text)[0]['translation_text']
# Postprocess the text
final_translation = postprocess_text(translated, custom_glossary)
return final_translation
except Exception as e:
return f"An error occurred during translation: {e}"
# Create the Gradio interface
iface = gr.Interface(
fn=translate_text,
inputs=gr.Textbox(lines=5, label="Input Text (English)"),
outputs=gr.Textbox(label="Translated Text (Spanish)"),
title="English to Spanish Translator",
description="Enter English text to translate it into Spanish, with accurate translation of construction and project management terms.",
)
# Launch the interface
if __name__ == "__main__":
iface.launch()