Spaces:
Sleeping
Sleeping
ghadaAlmuaikel
commited on
Commit
•
18053d0
1
Parent(s):
8e4ef82
Update app.py
Browse files
app.py
CHANGED
@@ -10,6 +10,8 @@ from sentence_transformers import SentenceTransformer, util
|
|
10 |
from langdetect import detect
|
11 |
from io import BytesIO
|
12 |
import pandas as pd
|
|
|
|
|
13 |
|
14 |
# DataFrame with information about the Paintings as image url, Title, description , stroy
|
15 |
|
@@ -133,17 +135,37 @@ df = pd.DataFrame(data)
|
|
133 |
|
134 |
# Load models
|
135 |
|
136 |
-
#
|
137 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
138 |
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
139 |
|
140 |
-
#
|
141 |
-
semantic_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
|
142 |
|
143 |
-
# translation models
|
144 |
-
translator_ar_to_en = pipeline("translation_ar_to_en", model="Helsinki-NLP/opus-mt-ar-en")
|
145 |
-
translator_en_to_ar = pipeline("translation_en_to_arabic", model="Helsinki-NLP/opus-mt-en-ar")
|
|
|
|
|
|
|
|
|
|
|
146 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
147 |
# Function to Convert the text to Speech in Arabic using gTTS
|
148 |
def text_to_speech_arabic(story_text):
|
149 |
tts = gTTS(text=story_text, lang='ar')
|
@@ -171,7 +193,7 @@ def fetch_image_from_url(url):
|
|
171 |
print(f"Error fetching image from {url}: {str(e)}")
|
172 |
return None
|
173 |
|
174 |
-
# Process the result where result is shown base on selected language
|
175 |
def process_best_match(best_match, language):
|
176 |
best_image_url = best_match["image_url"]
|
177 |
best_story = best_match["Story"]
|
@@ -185,9 +207,8 @@ def process_best_match(best_match, language):
|
|
185 |
|
186 |
# Otherwise, use English
|
187 |
info_html = f"<div style='font-size: 18px; color: white;'>{best_story}</div>"
|
188 |
-
|
189 |
-
|
190 |
-
return best_image_url, info_html, "best_story_english.mp3"
|
191 |
|
192 |
# Function to match the uploaded image with the DataFrame to retrive the image of painting from the Datafram and it story in text and audio
|
193 |
def compare_images(image, language):
|
|
|
10 |
from langdetect import detect
|
11 |
from io import BytesIO
|
12 |
import pandas as pd
|
13 |
+
import numpy as np
|
14 |
+
import soundfile as sf
|
15 |
|
16 |
# DataFrame with information about the Paintings as image url, Title, description , stroy
|
17 |
|
|
|
135 |
|
136 |
# Load models
|
137 |
|
138 |
+
# Determine if a GPU (CUDA) is available
|
139 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
140 |
+
|
141 |
+
# TTS model
|
142 |
+
narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device)
|
143 |
+
|
144 |
+
# Load the CLIP model and processor
|
145 |
+
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
|
146 |
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
147 |
|
148 |
+
# Load the semantic similarity model for description search
|
149 |
+
semantic_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device=device)
|
150 |
|
151 |
+
# Load the translation models for Arabic to English and English to Arabic translations
|
152 |
+
translator_ar_to_en = pipeline("translation_ar_to_en", model="Helsinki-NLP/opus-mt-ar-en", device=0 if device == "cuda" else -1)
|
153 |
+
translator_en_to_ar = pipeline("translation_en_to_arabic", model="Helsinki-NLP/opus-mt-en-ar", device=0 if device == "cuda" else -1)
|
154 |
+
|
155 |
+
# Function to Convert the text to Speech in Ensglish
|
156 |
+
def text_to_speech_english(story_text):
|
157 |
+
|
158 |
+
audio_output = narrator(story_text)
|
159 |
|
160 |
+
# Extract audio and sampling rate from the output
|
161 |
+
audio = np.squeeze(audio_output['audio'])
|
162 |
+
sampling_rate = audio_output['sampling_rate']
|
163 |
+
|
164 |
+
# Save the output as a WAV file using soundfile
|
165 |
+
sf.write("story_english.wav", audio, sampling_rate)
|
166 |
+
|
167 |
+
return "story_english.wav"
|
168 |
+
|
169 |
# Function to Convert the text to Speech in Arabic using gTTS
|
170 |
def text_to_speech_arabic(story_text):
|
171 |
tts = gTTS(text=story_text, lang='ar')
|
|
|
193 |
print(f"Error fetching image from {url}: {str(e)}")
|
194 |
return None
|
195 |
|
196 |
+
# Process the result where result is shown base on selected language
|
197 |
def process_best_match(best_match, language):
|
198 |
best_image_url = best_match["image_url"]
|
199 |
best_story = best_match["Story"]
|
|
|
207 |
|
208 |
# Otherwise, use English
|
209 |
info_html = f"<div style='font-size: 18px; color: white;'>{best_story}</div>"
|
210 |
+
audio_file = text_to_speech_english(best_story)
|
211 |
+
return best_image_url, info_html, audio_file
|
|
|
212 |
|
213 |
# Function to match the uploaded image with the DataFrame to retrive the image of painting from the Datafram and it story in text and audio
|
214 |
def compare_images(image, language):
|