rayhane123 commited on
Commit
6c95da8
·
verified ·
1 Parent(s): ec71fe6

Upload 10 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ translation/static/uploads/chapitre[[:space:]]2[[:space:]]RECONNAISSANCE[[:space:]]DE[[:space:]]FORMES[[:space:]]ET[[:space:]]CLASSIFICATION.pdf filter=lfs diff=lfs merge=lfs -text
37
+ translation/static/uploads/Untitled.pdf filter=lfs diff=lfs merge=lfs -text
translation/__pycache__/main.cpython-312.pyc ADDED
Binary file (7.41 kB). View file
 
translation/app.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ import os
3
+
4
+ # Add the "translation" folder to Python's path
5
+ sys.path.append(os.path.join(os.path.dirname(__file__), "translation"))
6
+
7
+ # Import FastAPI app from main.py
8
+ from translation.main import app
translation/requirements.txt ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ transformers
4
+ torch
5
+ textwrap
6
+ pymupdf
7
+ python-docx
8
+ openpyxl
9
+ python-pptx
translation/static/__pycache__/main.cpython-312.pyc ADDED
Binary file (7.41 kB). View file
 
translation/static/__pycache__/main.cpython-39.pyc ADDED
Binary file (318 Bytes). View file
 
translation/static/index.html ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <!DOCTYPE html>
2
+ <html lang="fr">
3
+ <head>
4
+ <meta charset="UTF-8">
5
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
6
+ <title>Document Translator - IA Edition</title>
7
+ <link href="https://cdn.jsdelivr.net/npm/[email protected]/dist/css/bootstrap.min.css" rel="stylesheet">
8
+ <script src="https://code.jquery.com/jquery-3.6.0.min.js"></script>
9
+ <script src="https://cdn.jsdelivr.net/npm/sweetalert2@11"></script>
10
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/jspdf/2.5.1/jspdf.umd.min.js"></script>
11
+ <style>
12
+ body {
13
+ background: linear-gradient(135deg, #1e3c72, #2a5298);
14
+ color: white;
15
+ text-align: center;
16
+ padding: 40px;
17
+ }
18
+ .container {
19
+ max-width: 600px;
20
+ background: rgba(255, 255, 255, 0.1);
21
+ padding: 20px;
22
+ border-radius: 15px;
23
+ backdrop-filter: blur(10px);
24
+ box-shadow: 0px 0px 15px rgba(255, 255, 255, 0.2);
25
+ }
26
+ .result-box {
27
+ background: white;
28
+ color: black;
29
+ padding: 15px;
30
+ border-radius: 10px;
31
+ text-align: left;
32
+ max-height: 300px;
33
+ overflow-y: auto;
34
+ margin-top: 15px;
35
+ font-size: 16px;
36
+ }
37
+ </style>
38
+ </head>
39
+ <body>
40
+ <div class="container">
41
+ <h1>📝 AI Document Translator</h1>
42
+ <p class="file-info">Formats supportés : <b>TXT, PDF, DOCX, PPTX, XLSX</b></p>
43
+
44
+ <form id="uploadForm">
45
+ <input type="file" id="fileInput" name="file" class="form-control mt-3" required>
46
+
47
+ <select id="src_lang" name="src_lang" class="form-control mt-3">
48
+ <option >Francais</option>
49
+ <option >Anglais</option>
50
+ <option >Arabe</option>
51
+ <option >Espagnol</option>
52
+ </select>
53
+ <select id="tgt_lang" name="tgt_lang" class="form-control mt-3">
54
+ <option >Francais</option>
55
+ <option >Anglais</option>
56
+ <option >Arabe</option>
57
+ <option >Espagnol</option>
58
+ </select>
59
+
60
+ <button type="submit" class="btn btn-success mt-3">Traduire</button>
61
+ </form>
62
+
63
+ <div class="menu mt-3">
64
+ <button type="button" class="btn btn-info" id="openFile">📂 Ouvrir le fichier</button>
65
+ <button id="copyText" class="btn btn-secondary" disabled>📋 Copier</button>
66
+ <button id="speakText" class="btn btn-dark" disabled>🔊 Écouter</button>
67
+ <button id="downloadTxt" class="btn btn-warning" disabled>⬇️ Télécharger en TXT</button>
68
+ <button id="downloadPdf" class="btn btn-danger" disabled>📄 Télécharger en PDF</button>
69
+ <button id="summarizeText" class="btn btn-primary" disabled>✂️ Résumer</button>
70
+ </div>
71
+
72
+ <h2 class="mt-4">Résultat :</h2>
73
+ <div id="result" class="result-box">Aucun texte traduit pour l’instant...</div>
74
+ </div>
75
+
76
+ <script>
77
+ $(document).ready(function() {
78
+ $('#uploadForm').submit(async function(event) {
79
+ event.preventDefault();
80
+ let formData = new FormData(this);
81
+ Swal.fire({ title: 'Traduction en cours...', didOpen: () => { Swal.showLoading(); }});
82
+
83
+ let response = await fetch('/upload/', { method: 'POST', body: formData });
84
+ let result = await response.json();
85
+ Swal.close();
86
+ if (result.translated_text) {
87
+ $('#result').html(`<b>✨ Traduction :</b><br>${result.translated_text}`);
88
+ $('#copyText, #speakText, #downloadTxt, #downloadPdf, #summarizeText').prop("disabled", false);
89
+ } else {
90
+ $('#result').html(`❌ Erreur: ${result.detail}`);
91
+ }
92
+ });
93
+
94
+ $('#speakText').click(function() {
95
+ let text = $('#result').text().trim();
96
+ if (!text) return;
97
+ let langMap = { "Francais": "fr-FR", "Anglais": "en-US", "Arabe": "ar-SA", "Espagnol": "es-ES" };
98
+ let lang = $('#tgt_lang').val();
99
+ let utterance = new SpeechSynthesisUtterance(text);
100
+ utterance.lang = langMap[lang] || "fr-FR";
101
+ utterance.rate = 1;
102
+ utterance.pitch = 1;
103
+ utterance.volume = 1;
104
+ speechSynthesis.speak(utterance);
105
+ });
106
+
107
+ $('#summarizeText').click(async function() {
108
+ let text = $('#result').text().trim();
109
+ if (!text) return;
110
+ Swal.fire({ title: 'Résumé en cours...', didOpen: () => { Swal.showLoading(); }});
111
+
112
+ let response = await fetch('/summarize/', {
113
+ method: 'POST',
114
+ headers: { 'Content-Type': 'application/json' },
115
+ body: JSON.stringify({ text: text })
116
+ });
117
+ let result = await response.json();
118
+ Swal.close();
119
+ if (result.summary) {
120
+ Swal.fire("Résumé", result.summary, "info");
121
+ } else {
122
+ Swal.fire("Erreur", "Impossible de résumer le texte.", "error");
123
+ }
124
+ });
125
+ });
126
+ </script>
127
+ </body>
128
+ </html>
translation/static/main.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, File, UploadFile, HTTPException, Form
2
+ from fastapi.responses import HTMLResponse
3
+ from fastapi.staticfiles import StaticFiles
4
+ from transformers import pipeline
5
+ import textwrap
6
+ import fitz # PyMuPDF for PDF handling
7
+ from docx import Document
8
+ import openpyxl # For Excel
9
+ from pptx import Presentation
10
+ from functools import lru_cache
11
+ import os
12
+
13
+ # Initialize FastAPI app
14
+ app = FastAPI()
15
+
16
+ # Set the correct path for static files
17
+ STATIC_DIR = r"C:\Users\User\doc_translation_service\translation\static"
18
+
19
+ # Ensure the static directory exists
20
+ if not os.path.exists(STATIC_DIR):
21
+ os.makedirs(STATIC_DIR)
22
+
23
+ # Mount static files (serves index.html)
24
+ app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
25
+
26
+ @app.get("/", response_class=HTMLResponse)
27
+ async def read_root():
28
+ index_path = os.path.join(STATIC_DIR, "index.html")
29
+ try:
30
+ with open(index_path, "r", encoding="utf-8") as file:
31
+ return HTMLResponse(content=file.read())
32
+ except FileNotFoundError:
33
+ raise HTTPException(status_code=404, detail="index.html not found in static folder.")
34
+
35
+ # Language codes (ISO 639-1)
36
+ LANGUAGE_CODES = {
37
+ "Anglais": "en",
38
+ "Francais": "fr",
39
+ "Arabe": "ar",
40
+ "Espagnol": "es",
41
+ }
42
+
43
+ # Available translation models
44
+ AVAILABLE_MODELS = {
45
+ "fr-en": "Helsinki-NLP/opus-mt-fr-en",
46
+ "en-fr": "Helsinki-NLP/opus-mt-en-fr",
47
+ "ar-en": "Helsinki-NLP/opus-mt-ar-en",
48
+ "en-ar": "Helsinki-NLP/opus-mt-en-ar",
49
+ "es-en": "Helsinki-NLP/opus-mt-es-en",
50
+ "en-es": "Helsinki-NLP/opus-mt-en-es",
51
+ }
52
+
53
+ # Cache model loading
54
+ @lru_cache(maxsize=10)
55
+ def load_translator(src_code: str, tgt_code: str):
56
+ model_key = f"{src_code}-{tgt_code}"
57
+
58
+ if model_key in AVAILABLE_MODELS:
59
+ return pipeline("translation", model=AVAILABLE_MODELS[model_key])
60
+
61
+ elif src_code != "en" and tgt_code != "en":
62
+ return (
63
+ pipeline("translation", model=AVAILABLE_MODELS.get(f"{src_code}-en")),
64
+ pipeline("translation", model=AVAILABLE_MODELS.get(f"en-{tgt_code}"))
65
+ )
66
+
67
+ else:
68
+ raise ValueError(f"No model available for {src_code} -> {tgt_code}")
69
+
70
+ # Split text into chunks
71
+ def chunk_text(text, max_length=400):
72
+ return textwrap.wrap(text, max_length)
73
+
74
+ # Extract text based on file type
75
+ def extract_text(file: UploadFile):
76
+ try:
77
+ if file.filename.endswith(".txt"):
78
+ return file.file.read().decode("utf-8")
79
+
80
+ elif file.filename.endswith(".pdf"):
81
+ doc = fitz.open(stream=file.file.read(), filetype="pdf")
82
+ return "\n".join([page.get_text() for page in doc])
83
+
84
+ elif file.filename.endswith(".docx"):
85
+ doc = Document(file.file)
86
+ return "\n".join([para.text for para in doc.paragraphs])
87
+
88
+ elif file.filename.endswith(".xlsx"):
89
+ wb = openpyxl.load_workbook(file.file)
90
+ text = ""
91
+ for sheet in wb.sheetnames:
92
+ ws = wb[sheet]
93
+ for row in ws.iter_rows():
94
+ text += "\t".join([str(cell.value or "") for cell in row]) + "\n"
95
+ return text
96
+
97
+ elif file.filename.endswith(".pptx"):
98
+ prs = Presentation(file.file)
99
+ text = ""
100
+ for slide in prs.slides:
101
+ for shape in slide.shapes:
102
+ if hasattr(shape, "text"):
103
+ text += shape.text + "\n"
104
+ return text
105
+
106
+ else:
107
+ raise HTTPException(status_code=400, detail="File type not supported.")
108
+
109
+ except Exception as e:
110
+ raise HTTPException(status_code=500, detail=f"Error extracting text: {str(e)}")
111
+
112
+ @app.post("/upload/")
113
+ async def upload_file(
114
+ file: UploadFile = File(...),
115
+ src_lang: str = Form(...),
116
+ tgt_lang: str = Form(...)
117
+ ):
118
+ text = extract_text(file)
119
+
120
+ if not text.strip():
121
+ raise HTTPException(status_code=400, detail="No text extracted from the file.")
122
+
123
+ src_code = LANGUAGE_CODES.get(src_lang)
124
+ tgt_code = LANGUAGE_CODES.get(tgt_lang)
125
+
126
+ if not src_code or not tgt_code:
127
+ raise HTTPException(status_code=400, detail=f"Unsupported language: {src_lang} -> {tgt_lang}")
128
+
129
+ try:
130
+ # Load translation model
131
+ translator = load_translator(src_code, tgt_code)
132
+
133
+ # If indirect translation via English
134
+ if isinstance(translator, tuple):
135
+ translator1, translator2 = translator
136
+ intermediate_text = "\n".join([translator1(chunk)[0]['translation_text'] for chunk in chunk_text(text)])
137
+ translated_text = "\n".join([translator2(chunk)[0]['translation_text'] for chunk in chunk_text(intermediate_text)])
138
+
139
+ else:
140
+ translated_text = "\n".join([translator(chunk)[0]['translation_text'] for chunk in chunk_text(text)])
141
+
142
+ return {"translated_text": translated_text}
143
+
144
+ except Exception as e:
145
+ raise HTTPException(status_code=500, detail=f"Internal error: {str(e)}")
translation/static/uploads/TD1 (1).docx ADDED
Binary file (53.4 kB). View file
 
translation/static/uploads/Untitled.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb03deee8b41e265cde7eb86f14deae4cc505d9fef18a3cdaccde123f7ca6ae8
3
+ size 349803
translation/static/uploads/chapitre 2 RECONNAISSANCE DE FORMES ET CLASSIFICATION.pdf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cad0c7e09e566f603390d90611e4fd0c969d6920a064b4cf74fdbcde58fc8c7e
3
+ size 1665388