rayhane commited on
Commit
6de4c8a
·
0 Parent(s):

Initial commit

Browse files
.dockerignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ __pycache__/
2
+ *.pyc
3
+ *.pyo
4
+ *.pyd
5
+ .env
6
+ venv/
.gitattributes ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ *.pdf filter=lfs diff=lfs merge=lfs -text
2
+ *.xlsx filter=lfs diff=lfs merge=lfs -text
3
+ *.docx filter=lfs diff=lfs merge=lfs -text
4
+ *.pptx filter=lfs diff=lfs merge=lfs -text
__pycache__/main.cpython-312.pyc ADDED
Binary file (6.19 kB). View file
 
app.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+
3
+ app = FastAPI()
4
+
5
+ @app.get("/")
6
+ def home():
7
+ return {"message": "Hello, Huging Face!"}
document-translation ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit de121a8cd759d996b71d7c28bff6504a66162227
main.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """from fastapi import FastAPI, File, UploadFile, HTTPException, Form
2
+ from fastapi.responses import HTMLResponse, JSONResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from transformers import pipeline
5
+ import textwrap
6
+ import fitz # PyMuPDF for PDF
7
+ from docx import Document
8
+ import openpyxl # For Excel
9
+ from pptx import Presentation
10
+ import os
11
+
12
+ app = FastAPI()
13
+
14
+ # Serve static files (like index.html)
15
+ app.mount("/static", StaticFiles(directory="static"), name="static")
16
+
17
+ @app.get("/", response_class=HTMLResponse)
18
+ async def read_root():
19
+ with open("static/index.html", "r") as file:
20
+ html_content = file.read()
21
+ return HTMLResponse(content=html_content)
22
+
23
+ # Language codes mapping
24
+ LANGUAGE_CODES = {
25
+ "Anglais": "en",
26
+ "Français": "fr",
27
+ "Arabe": "ar",
28
+ "Espagnol": "es",
29
+ "Allemand": "de",
30
+ "Italien": "it",
31
+ "Portugais": "pt",
32
+ "Néerlandais": "nl"
33
+ }
34
+
35
+ # Function to load translation model for dynamic language pairs
36
+ def load_translator(src_lang: str, tgt_lang: str):
37
+ src_code = LANGUAGE_CODES.get(src_lang)
38
+ tgt_code = LANGUAGE_CODES.get(tgt_lang)
39
+
40
+ if not src_code or not tgt_code:
41
+ raise ValueError(f"Unsupported language pair: {src_lang} -> {tgt_lang}")
42
+
43
+ try:
44
+ model_name = f"Helsinki-NLP/opus-mt-{src_code}-{tgt_code}"
45
+ return pipeline("translation", model=model_name)
46
+ except Exception as e:
47
+ if src_code != "en" and tgt_code != "en":
48
+ model_src_to_en = pipeline("translation", model=f"Helsinki-NLP/opus-mt-{src_code}-en")
49
+ model_en_to_tgt = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{tgt_code}")
50
+ return (model_src_to_en, model_en_to_tgt)
51
+ else:
52
+ raise ValueError(f"No available translation model for {src_lang} -> {tgt_lang}")
53
+
54
+ # Function to split text into manageable chunks
55
+ def chunk_text(text, max_length=400):
56
+ return textwrap.wrap(text, max_length)
57
+
58
+ # Extract text from different file types
59
+ def extract_text(file: UploadFile):
60
+ if file.filename.endswith(".txt"):
61
+ return file.file.read().decode("utf-8")
62
+ elif file.filename.endswith(".pdf"):
63
+ doc = fitz.open(stream=file.file.read(), filetype="pdf")
64
+ return "\n".join([page.get_text() for page in doc])
65
+ elif file.filename.endswith(".docx"):
66
+ doc = Document(file.file)
67
+ return "\n".join([para.text for para in doc.paragraphs])
68
+ elif file.filename.endswith(".xlsx"):
69
+ wb = openpyxl.load_workbook(file.file)
70
+ sheets = wb.sheetnames
71
+ text = ""
72
+ for sheet in sheets:
73
+ ws = wb[sheet]
74
+ for row in ws.iter_rows():
75
+ text += "\t".join([str(cell.value or "") for cell in row]) + "\n"
76
+ return text
77
+ elif file.filename.endswith(".pptx"):
78
+ prs = Presentation(file.file)
79
+ text = ""
80
+ for slide in prs.slides:
81
+ for shape in slide.shapes:
82
+ if hasattr(shape, "text"):
83
+ text += shape.text + "\n"
84
+ return text
85
+ else:
86
+ raise HTTPException(status_code=400, detail="Unsupported file type.")
87
+
88
+ # Upload and translate files
89
+ @app.post("/upload/")
90
+ async def upload_file(file: UploadFile = File(...), src_lang: str = Form(...), tgt_lang: str = Form(...)):
91
+ try:
92
+ text = extract_text(file)
93
+ translators = load_translator(src_lang, tgt_lang)
94
+
95
+ chunks = chunk_text(text)
96
+ if isinstance(translators, tuple):
97
+ translated_chunks = [translators[1](translators[0](chunk, max_length=400)[0]['translation_text'], max_length=400)[0]['translation_text'] for chunk in chunks]
98
+ else:
99
+ translated_chunks = [translators(chunk, max_length=400)[0]['translation_text'] for chunk in chunks]
100
+
101
+ translated_text = " ".join(translated_chunks)
102
+ return JSONResponse(content={"filename": file.filename, "translated_text": translated_text})
103
+ except Exception as e:
104
+ raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
105
+ """
requirements.txt ADDED
Binary file (2.27 kB). View file
 
static/Dockerfile ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Utilisation de l'image Python comme base
2
+ FROM python:3.9
3
+
4
+ # Définition du répertoire de travail
5
+ WORKDIR /app
6
+
7
+ # Copier les fichiers du projet dans l'image Docker
8
+ COPY . .
9
+
10
+ # Installer les dépendances
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Exposer le port utilisé par FastAPI
14
+ EXPOSE 7860
15
+
16
+ # Lancer l'application avec Uvicorn
17
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
18
+
19
+
20
+ ENV C:\Program Files\Docker\Docker
static/README.md ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # Document Translation API
2
+
3
+ Cette application permet de traduire des documents (TXT, PDF, DOCX) en plusieurs langues en utilisant FastAPI et les modèles de traduction de Hugging Face.
4
+
5
+ ## Comment l'utiliser ?
6
+ 1. **Uploader un document** (TXT, PDF, DOCX)
7
+ 2. **Choisir la langue source et la langue cible**
8
+ 3. **Obtenir la traduction instantanée !**
9
+
10
+ Déployé sur Hugging Face Spaces avec Docker. 🚀
static/index.html ADDED
@@ -0,0 +1,120 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ <!DOCTYPE html>
3
+ <html lang="en">
4
+ <head>
5
+ <meta charset="UTF-8">
6
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
7
+ <title>Document Translation Service</title>
8
+ <style>
9
+ body {
10
+ font-family: Arial, sans-serif;
11
+ background-color: #f4f4f4;
12
+ display: flex;
13
+ justify-content: center;
14
+ align-items: center;
15
+ height: 100vh;
16
+ margin: 0;
17
+ }
18
+ .container {
19
+ background: white;
20
+ padding: 2rem;
21
+ border-radius: 8px;
22
+ box-shadow: 0 4px 10px rgba(0,0,0,0.1);
23
+ width: 100%;
24
+ max-width: 500px;
25
+ }
26
+ h1 {
27
+ font-size: 1.8rem;
28
+ margin-bottom: 1rem;
29
+ text-align: center;
30
+ color: #333;
31
+ }
32
+ label {
33
+ display: block;
34
+ margin-bottom: 0.5rem;
35
+ font-weight: bold;
36
+ }
37
+ input, select, button {
38
+ width: 100%;
39
+ padding: 0.75rem;
40
+ margin-bottom: 1rem;
41
+ border: 1px solid #ddd;
42
+ border-radius: 4px;
43
+ }
44
+ button {
45
+ background-color: #4caf50;
46
+ color: white;
47
+ font-size: 1rem;
48
+ cursor: pointer;
49
+ border: none;
50
+ }
51
+ button:hover {
52
+ background-color: #45a049;
53
+ }
54
+ #result {
55
+ background: #f4f4f4;
56
+ padding: 1rem;
57
+ border: 1px solid #ddd;
58
+ border-radius: 4px;
59
+ white-space: pre-wrap;
60
+ min-height: 100px;
61
+ }
62
+ .error {
63
+ color: red;
64
+ font-weight: bold;
65
+ }
66
+ </style>
67
+ </head>
68
+ <body>
69
+ <div class="container">
70
+ <h1>Document Translator</h1>
71
+ <form id="uploadForm" enctype="multipart/form-data">
72
+ <label for="file">Choose a file that you (TXT, PDF, DOCX, PPTX, XLSX):</label>
73
+ <input type="file" id="file" name="file" accept=".txt,.pdf,.docx,.pptx,.xlsx" required>
74
+ <label for="src_lang">Source Language:</label>
75
+ <select id="src_lang" name="src_lang">
76
+ <option>Anglais</option>
77
+ <option>Fran�ais</option>
78
+ <option>Arabe</option>
79
+ <option>Espagnol</option>
80
+ <option>Allemand</option>
81
+ <option>Italien</option>
82
+ <option>Portugais</option>
83
+ <option>N�erlandais</option>
84
+ </select>
85
+ <label for="tgt_lang">Target Language:</label>
86
+ <select id="tgt_lang" name="tgt_lang">
87
+ <option>Anglais</option>
88
+ <option>Fran�ais</option>
89
+ <option>Arabe</option>
90
+ <option>Espagnol</option>
91
+ <option>Allemand</option>
92
+ <option>Italien</option>
93
+ <option>Portugais</option>
94
+ <option>N�erlandais</option>
95
+ </select>
96
+ <button type="submit">Translate</button>
97
+ </form>
98
+ <h2>Translation Result:</h2>
99
+ <pre id="result"></pre>
100
+ </div>
101
+ <script>
102
+ document.getElementById('uploadForm').onsubmit = async function(event) {
103
+ event.preventDefault();
104
+ const formData = new FormData(this);
105
+ const response = await fetch('/upload/', {
106
+ method: 'POST',
107
+ body: formData
108
+ });
109
+ const result = await response.json();
110
+ if (result.translated_text) {
111
+ document.getElementById('result').textContent = result.translated_text;
112
+ } else {
113
+ document.getElementById('result').textContent = 'Error: ' + result.detail;
114
+ document.getElementById('result').classList.add('error');
115
+ }
116
+ }
117
+ </script>
118
+ </body>
119
+ </html>
120
+
static/requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+ textwrap3
6
+ pymupdf
7
+ python-docx
8
+ openpyxl
9
+ pillow
uploads/TD1 (1).docx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a80769ffc2bd30394ef3124842a08c2a72dda9c161488c363799f53520895ec2
3
+ size 53350
uploads/Untitled.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb03deee8b41e265cde7eb86f14deae4cc505d9fef18a3cdaccde123f7ca6ae8
3
+ size 349803
uploads/chapitre 2 RECONNAISSANCE DE FORMES ET CLASSIFICATION.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad0c7e09e566f603390d90611e4fd0c969d6920a064b4cf74fdbcde58fc8c7e
3
+ size 1665388