ghadaAlmuaikel commited on
Commit
18053d0
1 Parent(s): 8e4ef82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +32 -11
app.py CHANGED
@@ -10,6 +10,8 @@ from sentence_transformers import SentenceTransformer, util
10
  from langdetect import detect
11
  from io import BytesIO
12
  import pandas as pd
 
 
13
 
14
  # DataFrame with information about the Paintings as image url, Title, description , stroy
15
 
@@ -133,17 +135,37 @@ df = pd.DataFrame(data)
133
 
134
  # Load models
135
 
136
- #Clip model and processor
137
- model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
 
 
 
 
 
 
138
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
139
 
140
- #sentiment similarity model
141
- semantic_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
142
 
143
- # translation models
144
- translator_ar_to_en = pipeline("translation_ar_to_en", model="Helsinki-NLP/opus-mt-ar-en")
145
- translator_en_to_ar = pipeline("translation_en_to_arabic", model="Helsinki-NLP/opus-mt-en-ar")
 
 
 
 
 
146
 
 
 
 
 
 
 
 
 
 
147
  # Function to Convert the text to Speech in Arabic using gTTS
148
  def text_to_speech_arabic(story_text):
149
  tts = gTTS(text=story_text, lang='ar')
@@ -171,7 +193,7 @@ def fetch_image_from_url(url):
171
  print(f"Error fetching image from {url}: {str(e)}")
172
  return None
173
 
174
- # Process the result where result is shown base on selected language
175
  def process_best_match(best_match, language):
176
  best_image_url = best_match["image_url"]
177
  best_story = best_match["Story"]
@@ -185,9 +207,8 @@ def process_best_match(best_match, language):
185
 
186
  # Otherwise, use English
187
  info_html = f"<div style='font-size: 18px; color: white;'>{best_story}</div>"
188
- tts = gTTS(text=best_story, lang='en')
189
- tts.save("best_story_english.mp3")
190
- return best_image_url, info_html, "best_story_english.mp3"
191
 
192
  # Function to match the uploaded image with the DataFrame to retrive the image of painting from the Datafram and it story in text and audio
193
  def compare_images(image, language):
 
10
  from langdetect import detect
11
  from io import BytesIO
12
  import pandas as pd
13
+ import numpy as np
14
+ import soundfile as sf
15
 
16
  # DataFrame with information about the Paintings as image url, Title, description , stroy
17
 
 
135
 
136
  # Load models
137
 
138
+ # Determine if a GPU (CUDA) is available
139
+ device = "cuda" if torch.cuda.is_available() else "cpu"
140
+
141
+ # TTS model
142
+ narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device)
143
+
144
+ # Load the CLIP model and processor
145
+ model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
146
  processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
147
 
148
+ # Load the semantic similarity model for description search
149
+ semantic_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device=device)
150
 
151
+ # Load the translation models for Arabic to English and English to Arabic translations
152
+ translator_ar_to_en = pipeline("translation_ar_to_en", model="Helsinki-NLP/opus-mt-ar-en", device=0 if device == "cuda" else -1)
153
+ translator_en_to_ar = pipeline("translation_en_to_arabic", model="Helsinki-NLP/opus-mt-en-ar", device=0 if device == "cuda" else -1)
154
+
155
+ # Function to Convert the text to Speech in Ensglish
156
+ def text_to_speech_english(story_text):
157
+
158
+ audio_output = narrator(story_text)
159
 
160
+ # Extract audio and sampling rate from the output
161
+ audio = np.squeeze(audio_output['audio'])
162
+ sampling_rate = audio_output['sampling_rate']
163
+
164
+ # Save the output as a WAV file using soundfile
165
+ sf.write("story_english.wav", audio, sampling_rate)
166
+
167
+ return "story_english.wav"
168
+
169
  # Function to Convert the text to Speech in Arabic using gTTS
170
  def text_to_speech_arabic(story_text):
171
  tts = gTTS(text=story_text, lang='ar')
 
193
  print(f"Error fetching image from {url}: {str(e)}")
194
  return None
195
 
196
+ # Process the result where result is shown base on selected language
197
  def process_best_match(best_match, language):
198
  best_image_url = best_match["image_url"]
199
  best_story = best_match["Story"]
 
207
 
208
  # Otherwise, use English
209
  info_html = f"<div style='font-size: 18px; color: white;'>{best_story}</div>"
210
+ audio_file = text_to_speech_english(best_story)
211
+ return best_image_url, info_html, audio_file
 
212
 
213
  # Function to match the uploaded image with the DataFrame to retrive the image of painting from the Datafram and it story in text and audio
214
  def compare_images(image, language):