Spaces:

Projetweb
/

AiServices

Runtime error

AiServices / translation /static /main.py

ikram

first commit

d0d55cc 4 months ago

4 kB

	"""from fastapi import FastAPI, File, UploadFile, HTTPException, Form
	from fastapi.responses import HTMLResponse, JSONResponse
	from fastapi.staticfiles import StaticFiles
	from transformers import pipeline
	import textwrap
	import fitz # PyMuPDF for PDF
	from docx import Document
	import openpyxl # For Excel
	from pptx import Presentation
	import os

	app = FastAPI()

	# Serve static files (like index.html)
	app.mount("/static", StaticFiles(directory="static"), name="static")

	@app.get("/", response_class=HTMLResponse)
	async def read_root():
	with open("static/index.html", "r") as file:
	html_content = file.read()
	return HTMLResponse(content=html_content)

	# Language codes mapping
	LANGUAGE_CODES = {
	"Anglais": "en",
	"Français": "fr",
	"Arabe": "ar",
	"Espagnol": "es",
	"Allemand": "de",
	"Italien": "it",
	"Portugais": "pt",
	"Néerlandais": "nl"
	}

	# Function to load translation model for dynamic language pairs
	def load_translator(src_lang: str, tgt_lang: str):
	src_code = LANGUAGE_CODES.get(src_lang)
	tgt_code = LANGUAGE_CODES.get(tgt_lang)

	if not src_code or not tgt_code:
	raise ValueError(f"Unsupported language pair: {src_lang} -> {tgt_lang}")

	try:
	model_name = f"Helsinki-NLP/opus-mt-{src_code}-{tgt_code}"
	return pipeline("translation", model=model_name)
	except Exception as e:
	if src_code != "en" and tgt_code != "en":
	model_src_to_en = pipeline("translation", model=f"Helsinki-NLP/opus-mt-{src_code}-en")
	model_en_to_tgt = pipeline("translation", model=f"Helsinki-NLP/opus-mt-en-{tgt_code}")
	return (model_src_to_en, model_en_to_tgt)
	else:
	raise ValueError(f"No available translation model for {src_lang} -> {tgt_lang}")

	# Function to split text into manageable chunks
	def chunk_text(text, max_length=400):
	return textwrap.wrap(text, max_length)

	# Extract text from different file types
	def extract_text(file: UploadFile):
	if file.filename.endswith(".txt"):
	return file.file.read().decode("utf-8")
	elif file.filename.endswith(".pdf"):
	doc = fitz.open(stream=file.file.read(), filetype="pdf")
	return "\n".join([page.get_text() for page in doc])
	elif file.filename.endswith(".docx"):
	doc = Document(file.file)
	return "\n".join([para.text for para in doc.paragraphs])
	elif file.filename.endswith(".xlsx"):
	wb = openpyxl.load_workbook(file.file)
	sheets = wb.sheetnames
	text = ""
	for sheet in sheets:
	ws = wb[sheet]
	for row in ws.iter_rows():
	text += "\t".join([str(cell.value or "") for cell in row]) + "\n"
	return text
	elif file.filename.endswith(".pptx"):
	prs = Presentation(file.file)
	text = ""
	for slide in prs.slides:
	for shape in slide.shapes:
	if hasattr(shape, "text"):
	text += shape.text + "\n"
	return text
	else:
	raise HTTPException(status_code=400, detail="Unsupported file type.")

	# Upload and translate files
	@app.post("/upload/")
	async def upload_file(file: UploadFile = File(...), src_lang: str = Form(...), tgt_lang: str = Form(...)):
	try:
	text = extract_text(file)
	translators = load_translator(src_lang, tgt_lang)

	chunks = chunk_text(text)
	if isinstance(translators, tuple):
	translated_chunks = [translators[1](translators[0](chunk, max_length=400)[0]['translation_text'], max_length=400)[0]['translation_text'] for chunk in chunks]
	else:
	translated_chunks = [translators(chunk, max_length=400)[0]['translation_text'] for chunk in chunks]

	translated_text = " ".join(translated_chunks)
	return JSONResponse(content={"filename": file.filename, "translated_text": translated_text})
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Translation failed: {str(e)}")
	"""