Spaces:
Sleeping
Sleeping
Farhan1572
commited on
Commit
•
d4f7d23
1
Parent(s):
476f2b9
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,185 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from openai import OpenAI
|
3 |
+
import pandas as pd
|
4 |
+
import io
|
5 |
+
import tempfile
|
6 |
+
import shutil
|
7 |
+
import google.generativeai as genai
|
8 |
+
import os
|
9 |
+
|
10 |
+
api_key = os.getenv("OPENAI_API_KEY")
|
11 |
+
gemni_api_key = os.getenv("GEMNI_API_KEY")
|
12 |
+
|
13 |
+
|
14 |
+
supported_languages = ['English', 'Brazilian Portuguese', 'Latin American Spanish', 'French', 'European Portuguese', 'Castilian Spanish', 'German', 'Italian', 'Czech', 'Danish', 'Dutch', 'Finnish', 'Norwegian', 'Swedish', 'Hungarian', 'Greek', 'Romanian', 'Polish', 'Arabic', 'Urdu']
|
15 |
+
|
16 |
+
|
17 |
+
|
18 |
+
|
19 |
+
# OPENAI
|
20 |
+
client = OpenAI(api_key = api_key)
|
21 |
+
|
22 |
+
def translate_text_openai(source_language, target_language, TEXT, max_characters):
|
23 |
+
|
24 |
+
response = client.chat.completions.create(
|
25 |
+
model="gpt-3.5-turbo-0125",
|
26 |
+
temperature = 0.1,
|
27 |
+
messages=[
|
28 |
+
{"role": "system", "content": "You are a multilingual translator for movies subtitles."},
|
29 |
+
{"role": "system", "content": "The number of input characters and output characters should be the same despite the change in language."},
|
30 |
+
{"role": "system", "content": f"In response, maximum characters allowed are {max_characters}"},
|
31 |
+
{"role": "system", "content": "You SHOULD NOT SKIP ANY LINE OR ANY INFORMATION"},
|
32 |
+
{"role": "system", "content": "The Tranlation should be error proof"},
|
33 |
+
|
34 |
+
|
35 |
+
{"role": "user", "content": f"""Translate the text from {source_language} language to {target_language} language.:
|
36 |
+
\nTEXT: {TEXT}
|
37 |
+
NOTE: THE OUTPUT SHOULD BE IN {target_language} language.
|
38 |
+
\nREMEMBER: MAXIMUM output chaeracters should be {max_characters}
|
39 |
+
|
40 |
+
END NOTE: THE OUTPUT SHOULD BE IN {target_language} language.
|
41 |
+
REMEMBER: THE OUTPUT SHOULD BE IN {target_language} language.
|
42 |
+
Hey on some instances you give response in languages other than {target_language}, which is wrong
|
43 |
+
"""},
|
44 |
+
]
|
45 |
+
)
|
46 |
+
return response.choices[0].message.content
|
47 |
+
|
48 |
+
def translate_text_correct_openai(source_language, target_language, TEXT, max_characters):
|
49 |
+
|
50 |
+
response = client.chat.completions.create(
|
51 |
+
model="gpt-3.5-turbo-0125",
|
52 |
+
temperature = 0.1,
|
53 |
+
messages=[
|
54 |
+
{"role": "system", "content": "You reduce the size of the sentences."},
|
55 |
+
{"role": "system", "content": f"The maximuim output should not be more than {max_characters} characters."},
|
56 |
+
{"role": "user", "content": f"""
|
57 |
+
DO NOT CHANGE THE LANGUAGE
|
58 |
+
Reduce the size of the text to less than {max_characters} characters even if there is a change in meaning.
|
59 |
+
REMEMBER LLM, THE INPUT AND THE OUTPUT SHOULD BE IN THE SAME LANGUAGE
|
60 |
+
\nWrite the sentence in shortest possible manner
|
61 |
+
\nTEXT: {TEXT}\
|
62 |
+
REMEMBER LLM, THE INPUT AND THE OUTPUT SHOULD BE IN THE SAME LANGUAGE
|
63 |
+
|
64 |
+
"""},
|
65 |
+
]
|
66 |
+
)
|
67 |
+
return response.choices[0].message.content
|
68 |
+
|
69 |
+
# GEMNI
|
70 |
+
|
71 |
+
|
72 |
+
genai.configure(api_key=gemni_api_key)
|
73 |
+
|
74 |
+
model = genai.GenerativeModel('gemini-1.5-flash')
|
75 |
+
|
76 |
+
|
77 |
+
def translate_text_gemni(source_language, target_language, TEXT, max_characters):
|
78 |
+
|
79 |
+
response = model.generate_content(f'''You are a multilingual translator for movies subtitles.
|
80 |
+
The number of input characters and output characters should be the same despite the change in language.
|
81 |
+
In response, maximum characters allowed are {max_characters}.
|
82 |
+
You SHOULD NOT SKIP ANY LINE OR ANY INFORMATION.
|
83 |
+
The Tranlation should be error proof.
|
84 |
+
Translate the text from {source_language} language to {target_language} language.:
|
85 |
+
\nTEXT: {TEXT}
|
86 |
+
NOTE: THE OUTPUT SHOULD BE IN {target_language} language.
|
87 |
+
\nREMEMBER: MAXIMUM output chaeracters should be {max_characters}
|
88 |
+
|
89 |
+
END NOTE: THE OUTPUT SHOULD BE IN {target_language} language.
|
90 |
+
REMEMBER: THE OUTPUT SHOULD BE IN {target_language} language.
|
91 |
+
Hey on some instances you give response in languages other than {target_language}, which is wrong
|
92 |
+
'''
|
93 |
+
|
94 |
+
)
|
95 |
+
return response.text
|
96 |
+
|
97 |
+
def translate_text_correct_gemni(source_language, target_language, TEXT, max_characters):
|
98 |
+
|
99 |
+
response = model.generate_content(f"""
|
100 |
+
You reduce the size of the sentences.
|
101 |
+
The maximuim output should not be more than {max_characters} characters.
|
102 |
+
DO NOT CHANGE THE LANGUAGE
|
103 |
+
Reduce the size of the text to less than {max_characters} characters even if there is a change in meaning.
|
104 |
+
REMEMBER LLM, THE INPUT AND THE OUTPUT SHOULD BE IN THE SAME LANGUAGE
|
105 |
+
\nWrite the sentence in shortest possible manner
|
106 |
+
\nTEXT: {TEXT}\
|
107 |
+
REMEMBER LLM, THE INPUT AND THE OUTPUT SHOULD BE IN THE SAME LANGUAGE
|
108 |
+
"""
|
109 |
+
)
|
110 |
+
return response.text
|
111 |
+
|
112 |
+
|
113 |
+
|
114 |
+
|
115 |
+
|
116 |
+
|
117 |
+
|
118 |
+
|
119 |
+
|
120 |
+
|
121 |
+
def check_conditon_openai(source_language, target_language, response, max_characters):
|
122 |
+
length = len(response)
|
123 |
+
|
124 |
+
if length > int(max_characters):
|
125 |
+
response = translate_text_correct_openai(source_language, target_language, response, max_characters)
|
126 |
+
return check_conditon_openai(source_language, target_language, response, max_characters)
|
127 |
+
return response
|
128 |
+
|
129 |
+
|
130 |
+
def check_conditon_gemni(source_language, target_language, response, max_characters):
|
131 |
+
length = len(response)
|
132 |
+
|
133 |
+
if length > int(max_characters):
|
134 |
+
response = translate_text_correct_gemni(source_language, target_language, response, max_characters)
|
135 |
+
return check_conditon_gemni(source_language, target_language, response, max_characters)
|
136 |
+
return response
|
137 |
+
|
138 |
+
|
139 |
+
|
140 |
+
|
141 |
+
def get_translation(source_language, target_language, TEXT, max_characters):
|
142 |
+
|
143 |
+
response_openai = translate_text_openai(source_language, target_language, TEXT, max_characters)
|
144 |
+
response_openai = check_conditon_openai(source_language, target_language, response_openai, max_characters)
|
145 |
+
|
146 |
+
response_gemni = translate_text_gemni(source_language, target_language, TEXT, max_characters)
|
147 |
+
response_gemni = check_conditon_gemni(source_language, target_language, response_gemni, max_characters)
|
148 |
+
|
149 |
+
|
150 |
+
excel_data_path = create_excel(TEXT, response_openai, response_gemni)
|
151 |
+
|
152 |
+
|
153 |
+
return excel_data_path
|
154 |
+
|
155 |
+
|
156 |
+
def create_excel(TEXT, response_openai, response_gemni):
|
157 |
+
# Create a DataFrame from the input data
|
158 |
+
df = pd.DataFrame({"Original Text": TEXT, "OpenAI Translated": response_openai, "Gemni Translated": response_gemni}, index = [1])
|
159 |
+
|
160 |
+
# Create a temporary file to store the Excel data
|
161 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix='.xlsx') as temp_file:
|
162 |
+
# Write the DataFrame to the temporary file as an Excel file
|
163 |
+
with pd.ExcelWriter(temp_file, engine='xlsxwriter') as writer:
|
164 |
+
df.to_excel(writer, index=False, sheet_name='Sheet1')
|
165 |
+
|
166 |
+
# Return the path to the temporary file
|
167 |
+
temp_file_path = temp_file.name
|
168 |
+
|
169 |
+
return temp_file_path
|
170 |
+
|
171 |
+
|
172 |
+
iface = gr.Interface(
|
173 |
+
fn=get_translation,
|
174 |
+
inputs=[
|
175 |
+
gr.Dropdown(choices= supported_languages, label="Source Language"), # Add more languages as needed
|
176 |
+
gr.Dropdown(choices= supported_languages, label="Target Language"),
|
177 |
+
gr.Textbox(lines=2, label="Input Text"),
|
178 |
+
gr.Textbox(lines=1, label="Difine number of output characters"),
|
179 |
+
],
|
180 |
+
outputs=gr.File(label="Download Excel File"),
|
181 |
+
title="MVP Multilingual Translation",
|
182 |
+
description="MVP Multilingual Translation by Farhan",
|
183 |
+
)
|
184 |
+
|
185 |
+
iface.launch(share=True, debug=True)
|