rayhane commited on
Commit
8b3885d
·
1 Parent(s): 6de4c8a

Fix deployment

Browse files
static/.dockerignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .env
6
+ venv/
static/README.md DELETED
@@ -1,10 +0,0 @@
1
- # Document Translation API
2
-
3
- Cette application permet de traduire des documents (TXT, PDF, DOCX) en plusieurs langues en utilisant FastAPI et les modèles de traduction de Hugging Face.
4
-
5
- ## Comment l'utiliser ?
6
- 1. **Uploader un document** (TXT, PDF, DOCX)
7
- 2. **Choisir la langue source et la langue cible**
8
- 3. **Obtenir la traduction instantanée !**
9
-
10
- Déployé sur Hugging Face Spaces avec Docker. 🚀
 
 
 
 
 
 
 
 
 
 
 
static/README1.md ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: "Document Translation Service"
3
+ emoji: "🌍"
4
+ colorFrom: "blue"
5
+ colorTo: "green"
6
+ sdk: "docker"
7
+ app_file: "app.py"
8
+ pinned: false
9
+ ---
static/app.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+
3
+ app = FastAPI()
4
+
5
+ @app.get("/")
6
+ def home():
7
+ return {"message": "Hello, Huging Face!"}
static/main.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """from fastapi import FastAPI, File, UploadFile, HTTPException, Form
2
+ from fastapi.responses import HTMLResponse, JSONResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from transformers import pipeline
5
+ import textwrap
6
+ import fitz # PyMuPDF for PDF
7
+ from docx import Document
8
+ import openpyxl # For Excel
9
+ from pptx import Presentation
10
+ import os
11
+
12
+ app = FastAPI()
13
+
14
+ # Serve static files (like index.html)
15
+ app.mount("/static", StaticFiles(directory="static"), name="static")
16
+
17
+ @app.get("/", response_class=HTMLResponse)
18
+ async def read_root():
19
+ with open("static/index.html", "r") as file:
20
+ html_content = file.read()
21
+ return HTMLResponse(content=html_content)
22
+
23
+ # Language codes mapping
24
+ LANGUAGE_CODES = {
25
+ "Anglais": "en",
26
+ "Français": "fr",
27
+ "Arabe": "ar",
28
+ "Espagnol": "es",
29
+ "Allemand": "de",
30
+ "Italien": "it",
31
+ "Portugais": "pt",
32
+ "Néerlandais": "nl"
33
+ }
34
+
35
+ # Function to load translation model for dynamic language pairs
36
+ def load_translator(src_lang: str, tgt_lang: str):
37
+ src_code = LANGUAGE_CODES.get(src_lang)
38
+ tgt_code = LANGUAGE_CODES.get(tgt_lang)
39
+
40
+ if not src_code or not tgt_code:
41
+ raise ValueError(f"Unsupported language pair: {src_lang} -> {tgt_lang}")
42
+
43
+ try:
44
+ model_name = f"Helsinki-NLP/opus-mt-{src_code}-{tgt_code}"
45
+ return pipeline("translation", model=model_name)
46
+ except Exception as e:
47
+ if src_code != "en" and tgt_code != "en":
48
+ model_src_to_en = pipeline("translation", model=f"Helsinki-NLP/opus-mt-{src_code}-en")
49
+ model_en_to_tgt = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{tgt_code}")
50
+ return (model_src_to_en, model_en_to_tgt)
51
+ else:
52
+ raise ValueError(f"No available translation model for {src_lang} -> {tgt_lang}")
53
+
54
+ # Function to split text into manageable chunks
55
+ def chunk_text(text, max_length=400):
56
+ return textwrap.wrap(text, max_length)
57
+
58
+ # Extract text from different file types
59
+ def extract_text(file: UploadFile):
60
+ if file.filename.endswith(".txt"):
61
+ return file.file.read().decode("utf-8")
62
+ elif file.filename.endswith(".pdf"):
63
+ doc = fitz.open(stream=file.file.read(), filetype="pdf")
64
+ return "\n".join([page.get_text() for page in doc])
65
+ elif file.filename.endswith(".docx"):
66
+ doc = Document(file.file)
67
+ return "\n".join([para.text for para in doc.paragraphs])
68
+ elif file.filename.endswith(".xlsx"):
69
+ wb = openpyxl.load_workbook(file.file)
70
+ sheets = wb.sheetnames
71
+ text = ""
72
+ for sheet in sheets:
73
+ ws = wb[sheet]
74
+ for row in ws.iter_rows():
75
+ text += "\t".join([str(cell.value or "") for cell in row]) + "\n"
76
+ return text
77
+ elif file.filename.endswith(".pptx"):
78
+ prs = Presentation(file.file)
79
+ text = ""
80
+ for slide in prs.slides:
81
+ for shape in slide.shapes:
82
+ if hasattr(shape, "text"):
83
+ text += shape.text + "\n"
84
+ return text
85
+ else:
86
+ raise HTTPException(status_code=400, detail="Unsupported file type.")
87
+
88
+ # Upload and translate files
89
+ @app.post("/upload/")
90
+ async def upload_file(file: UploadFile = File(...), src_lang: str = Form(...), tgt_lang: str = Form(...)):
91
+ try:
92
+ text = extract_text(file)
93
+ translators = load_translator(src_lang, tgt_lang)
94
+
95
+ chunks = chunk_text(text)
96
+ if isinstance(translators, tuple):
97
+ translated_chunks = [translators[1](translators[0](chunk, max_length=400)[0]['translation_text'], max_length=400)[0]['translation_text'] for chunk in chunks]
98
+ else:
99
+ translated_chunks = [translators(chunk, max_length=400)[0]['translation_text'] for chunk in chunks]
100
+
101
+ translated_text = " ".join(translated_chunks)
102
+ return JSONResponse(content={"filename": file.filename, "translated_text": translated_text})
103
+ except Exception as e:
104
+ raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
105
+ """
static/requirements1.txt ADDED
Binary file (2.27 kB). View file
 
static/uploads/TD1 (1).docx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a80769ffc2bd30394ef3124842a08c2a72dda9c161488c363799f53520895ec2
3
+ size 53350
static/uploads/Untitled.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb03deee8b41e265cde7eb86f14deae4cc505d9fef18a3cdaccde123f7ca6ae8
3
+ size 349803
static/uploads/chapitre 2 RECONNAISSANCE DE FORMES ET CLASSIFICATION.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad0c7e09e566f603390d90611e4fd0c969d6920a064b4cf74fdbcde58fc8c7e
3
+ size 1665388