Spaces:

ghadaAlmuaikel
/

ArtVoice_Tour

Sleeping

App Files Files Community

ghadaAlmuaikel commited on Sep 29, 2024

Commit

18053d0

verified ·

1 Parent(s): 8e4ef82

Update app.py

Browse files

Files changed (1) hide show

app.py +32 -11

app.py CHANGED Viewed

@@ -10,6 +10,8 @@ from sentence_transformers import SentenceTransformer, util
 from langdetect import detect
 from io import BytesIO
 import pandas as pd
 # DataFrame with information about the Paintings as image url, Title, description , stroy
@@ -133,17 +135,37 @@ df = pd.DataFrame(data)
 # Load models
-#Clip model and processor
-model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
-#sentiment similarity model
-semantic_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')
-# translation models
-translator_ar_to_en = pipeline("translation_ar_to_en", model="Helsinki-NLP/opus-mt-ar-en")
-translator_en_to_ar = pipeline("translation_en_to_arabic", model="Helsinki-NLP/opus-mt-en-ar")
 # Function to Convert the text to Speech in Arabic using gTTS
 def text_to_speech_arabic(story_text):
     tts = gTTS(text=story_text, lang='ar')
@@ -171,7 +193,7 @@ def fetch_image_from_url(url):
         print(f"Error fetching image from {url}: {str(e)}")
         return None
-# Process the result where result is shown base on selected language
 def process_best_match(best_match, language):
     best_image_url = best_match["image_url"]
     best_story = best_match["Story"]
@@ -185,9 +207,8 @@ def process_best_match(best_match, language):
     # Otherwise, use English
     info_html = f"<div style='font-size: 18px; color: white;'>{best_story}</div>"
-    tts = gTTS(text=best_story, lang='en')
-    tts.save("best_story_english.mp3")
-    return best_image_url, info_html, "best_story_english.mp3"
 # Function to match the uploaded image with the DataFrame to retrive the image of painting from the Datafram and it story in text and audio
 def compare_images(image, language):

 from langdetect import detect
 from io import BytesIO
 import pandas as pd
+import numpy as np
+import soundfile as sf
 # DataFrame with information about the Paintings as image url, Title, description , stroy
 # Load models
+# Determine if a GPU (CUDA) is available
+device = "cuda" if torch.cuda.is_available() else "cpu"
+#  TTS model
+narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device)
+# Load the CLIP model and processor
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
 processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+# Load the semantic similarity model for description search
+semantic_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device=device)
+# Load the translation models for Arabic to English and English to Arabic translations
+translator_ar_to_en = pipeline("translation_ar_to_en", model="Helsinki-NLP/opus-mt-ar-en", device=0 if device == "cuda" else -1)
+translator_en_to_ar = pipeline("translation_en_to_arabic", model="Helsinki-NLP/opus-mt-en-ar", device=0 if device == "cuda" else -1)
+# Function to Convert the text to Speech in Ensglish
+def text_to_speech_english(story_text):
+    audio_output = narrator(story_text)
+    # Extract audio and sampling rate from the output
+    audio = np.squeeze(audio_output['audio'])
+    sampling_rate = audio_output['sampling_rate']
+    # Save the output as a WAV file using soundfile
+    sf.write("story_english.wav", audio, sampling_rate)
+    return "story_english.wav"
 # Function to Convert the text to Speech in Arabic using gTTS
 def text_to_speech_arabic(story_text):
     tts = gTTS(text=story_text, lang='ar')
         print(f"Error fetching image from {url}: {str(e)}")
         return None
+# Process the result where result is shown base on selected language
 def process_best_match(best_match, language):
     best_image_url = best_match["image_url"]
     best_story = best_match["Story"]
     # Otherwise, use English
     info_html = f"<div style='font-size: 18px; color: white;'>{best_story}</div>"
+    audio_file = text_to_speech_english(best_story)
+    return best_image_url, info_html, audio_file
 # Function to match the uploaded image with the DataFrame to retrive the image of painting from the Datafram and it story in text and audio
 def compare_images(image, language):