Spaces:

KIMOSSINO
/

TRANSCRIPTIONV4

Sleeping

File size: 8,128 Bytes

f79c89e
b0e29e7
36abef4
70d5a19
b0e29e7
d5f394e
 
f79c89e
36abef4
70d5a19
36abef4
d5f394e
f79c89e
 
 
d5f394e
b0e29e7
 
 
 
f79c89e
d5f394e
36abef4
 
 
 
d5f394e
 
70d5a19
 
36abef4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70d5a19
 
36abef4
 
d5f394e
36abef4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f79c89e
d5f394e
36abef4
d5f394e
f79c89e
b0e29e7
d5f394e
 
618c56b
d5f394e
36abef4
b0e29e7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d5f394e
 
 
36abef4
 
70d5a19
36abef4
70d5a19
 
 
36abef4
 
 
 
 
 
 
70d5a19
36abef4
70d5a19
36abef4
 
70d5a19
36abef4
 
 
 
70d5a19
 
 
 
36abef4
 
 
 
f79c89e
 
 
 
36abef4
618c56b
36abef4
 
 
 
f4ac0b9
 
 
36abef4
f4ac0b9
f79c89e
 
 
 
 
36abef4
 
f79c89e
 
 
 
618c56b
f79c89e
 
618c56b
 
f4ac0b9
 
 
 
 
 
 
 
 
 
f79c89e
 
 
 
 
 
 
618c56b
70d5a19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36abef4
70d5a19
 
 
d5f394e
f79c89e
70d5a19

import os
import json
import asyncio
import tempfile
import requests
import gradio as gr
import whisper
import torch
import edge_tts
from pathlib import Path
from moviepy.editor import VideoFileClip

# تهيئة النماذج
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
whisper_model = whisper.load_model("base")

# مفتاح API لـ Gemini
GEMINI_API_KEY = "AIzaSyDrHCW4FxrDt6amCTQvYPTdh2NE06p9YlQ"
GEMINI_API_URL = "https://generativelanguage.googleapis.com/v1beta/models/gemini-1.5-flash-latest:generateContent"

# قاموس للغات المدعومة
SUPPORTED_LANGUAGES = {
    "ar": {"name": "العربية", "code": "ar-SA"},
    "en": {"name": "English", "code": "en-US"},
    "fr": {"name": "Français", "code": "fr-FR"},
    "es": {"name": "Español", "code": "es-ES"}
}

# قاموس لأنواع الأصوات
VOICE_TYPES = {
    "رجل": {
        "ar": "ar-SA-HamedNeural",
        "en": "en-US-ChristopherNeural",
        "fr": "fr-FR-HenriNeural",
        "es": "es-ES-AlvaroNeural"
    },
    "امرأة": {
        "ar": "ar-SA-ZariyahNeural",
        "en": "en-US-JennyNeural",
        "fr": "fr-FR-DeniseNeural",
        "es": "es-ES-ElviraNeural"
    },
    "طفل": {
        "ar": "ar-SA-ZariyahNeural",  # نستخدم صوت المرأة مع تعديل النبرة
        "en": "en-US-JennyNeural",
        "fr": "fr-FR-DeniseNeural",
        "es": "es-ES-ElviraNeural"
    }
}

def extract_audio_from_video(video_path):
    """استخراج الصوت من الفيديو"""
    try:
        video = VideoFileClip(video_path)
        temp_audio_path = tempfile.mktemp(suffix=".mp3")
        video.audio.write_audiofile(temp_audio_path, codec='mp3')
        video.close()
        return temp_audio_path
    except Exception as e:
        raise Exception(f"خطأ في استخراج الصوت من الفيديو: {str(e)}")

def process_media_file(file_path, source_lang):
    """معالجة ملف الوسائط (صوت أو فيديو)"""
    try:
        # التحقق من نوع الملف
        if file_path.endswith(('.mp4', '.avi', '.mov', '.mkv')):
            # إذا كان فيديو، استخرج الصوت منه
            audio_path = extract_audio_from_video(file_path)
        else:
            # إذا كان ملف صوتي، استخدمه مباشرة
            audio_path = file_path
        
        # تحويل الصوت إلى نص
        result = whisper_model.transcribe(audio_path, language=source_lang)
        
        # حذف الملف المؤقت إذا كان فيديو
        if file_path.endswith(('.mp4', '.avi', '.mov', '.mkv')):
            os.remove(audio_path)
        
        return result["text"]
    except Exception as e:
        return f"خطأ في معالجة الملف: {str(e)}"

def translate_text(text, source_lang, target_lang):
    """ترجمة النص باستخدام Gemini API"""
    if source_lang == target_lang:
        return text
    
    try:
        prompt = f"Translate the following text from {SUPPORTED_LANGUAGES[source_lang]['name']} to {SUPPORTED_LANGUAGES[target_lang]['name']}. Only provide the translation without any additional text or explanation:\n\n{text}"
        
        payload = {
            "contents": [{
                "parts": [{
                    "text": prompt
                }]
            }]
        }
        
        url = f"{GEMINI_API_URL}?key={GEMINI_API_KEY}"
        
        response = requests.post(
            url,
            headers={"Content-Type": "application/json"},
            json=payload
        )
        
        if response.status_code == 200:
            result = response.json()
            translated_text = result['candidates'][0]['content']['parts'][0]['text']
            return translated_text
        else:
            return f"خطأ في الترجمة: {response.status_code} - {response.text}"
            
    except Exception as e:
        return f"خطأ في الترجمة: {str(e)}"

async def text_to_speech(text, language, voice_type):
    """تحويل النص إلى صوت باستخدام Edge TTS"""
    try:
        # إنشاء مجلد مؤقت للملفات الصوتية
        temp_dir = Path("temp_audio")
        temp_dir.mkdir(exist_ok=True)
        
        # اختيار الصوت المناسب
        voice = VOICE_TYPES[voice_type][language]
        
        # تعديل السرعة والنبرة حسب نوع الصوت
        rate = "+0%" if voice_type != "طفل" else "+15%"
        pitch = "+0Hz" if voice_type == "رجل" else "+50Hz" if voice_type == "امرأة" else "+100Hz"
        
        # إنشاء ملف صوتي مؤقت
        output_file = temp_dir / f"output_{voice_type}_{language}.mp3"
        
        # تكوين كائن communicate
        communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
        
        # حفظ الملف الصوتي
        await communicate.save(str(output_file))
        
        return str(output_file)
    
    except Exception as e:
        return f"خطأ في تحويل النص إلى صوت: {str(e)}"

def text_to_speech_wrapper(text, language, voice_type):
    """wrapper function لتشغيل الدالة غير المتزامنة"""
    return asyncio.run(text_to_speech(text, language, voice_type))

# إنشاء واجهة Gradio
with gr.Blocks(title="معالج الصوت والترجمة", theme=gr.themes.Soft()) as demo:
    gr.Markdown("# معالج الصوت والترجمة متعدد اللغات")
    
    with gr.Tab("تحويل الوسائط إلى نص"):
        with gr.Row():
            media_input = gr.File(
                label="ملف صوتي أو فيديو",
                file_types=["audio/*", "video/*"]
            )
            source_lang = gr.Dropdown(
                choices=list(SUPPORTED_LANGUAGES.keys()),
                value="ar",
                label="لغة الملف"
            )
        
        transcribe_btn = gr.Button("تحويل إلى نص")
        transcribed_text = gr.Textbox(label="النص المستخرج", lines=5)
        
        transcribe_btn.click(
            fn=process_media_file,
            inputs=[media_input, source_lang],
            outputs=transcribed_text
        )
    
    with gr.Tab("ترجمة النص"):
        with gr.Row():
            input_text = gr.Textbox(label="النص المراد ترجمته", lines=5)
            translated_text = gr.Textbox(label="النص المترجم", lines=5)
        
        with gr.Row():
            trans_source_lang = gr.Dropdown(
                choices=list(SUPPORTED_LANGUAGES.keys()),
                value="ar",
                label="اللغة المصدر"
            )
            trans_target_lang = gr.Dropdown(
                choices=list(SUPPORTED_LANGUAGES.keys()),
                value="en",
                label="اللغة الهدف"
            )
        
        translate_btn = gr.Button("ترجمة")
        
        translate_btn.click(
            fn=translate_text,
            inputs=[input_text, trans_source_lang, trans_target_lang],
            outputs=translated_text
        )
    
    with gr.Tab("تحويل النص إلى صوت"):
        with gr.Row():
            tts_text = gr.Textbox(label="النص المراد تحويله إلى صوت", lines=5)
            tts_output = gr.Audio(label="الصوت الناتج")
        
        with gr.Row():
            tts_lang = gr.Dropdown(
                choices=list(SUPPORTED_LANGUAGES.keys()),
                value="ar",
                label="لغة النص"
            )
            voice_type = gr.Radio(
                choices=list(VOICE_TYPES.keys()),
                value="رجل",
                label="نوع الصوت"
            )
        
        tts_btn = gr.Button("تحويل إلى صوت")
        
        tts_btn.click(
            fn=text_to_speech_wrapper,
            inputs=[tts_text, tts_lang, voice_type],
            outputs=tts_output
        )

# تشغيل التطبيق
if __name__ == "__main__":
    demo.launch()