Spaces:
Sleeping
Sleeping
#Imports | |
import gradio as gr | |
from PIL import Image, UnidentifiedImageError | |
from gtts import gTTS | |
import requests | |
import re | |
import torch | |
from transformers import CLIPProcessor, CLIPModel, pipeline | |
from sentence_transformers import SentenceTransformer, util | |
from langdetect import detect | |
from io import BytesIO | |
import pandas as pd | |
import numpy as np | |
import soundfile as sf | |
import os | |
import subprocess | |
# Run the setup script to install espeak-ng | |
subprocess.run(['bash', 'setup.sh'], check=True) | |
# DataFrame with information about the Paintings as image url, Title, description , stroy | |
data = { | |
"image_url": [ | |
"https://s.turbifycdn.com/aah/gallerydirectart/vincent-van-gogh-estate-signed-limited-edition-giclee-starry-night-47.png", # Starry Night | |
"https://cdn.mos.cms.futurecdn.net/xRqbwS4odpkSQscn3jHECh-1200-80.jpg", # Mona Lisa | |
"https://upload.wikimedia.org/wikipedia/en/d/dd/The_Persistence_of_Memory.jpg", # The Persistence of Memory | |
"https://static.wixstatic.com/media/1071a8_cf1930f883e043e28d03d5a26a5960ef~mv2.jpg/v1/fill/w_568,h_718,al_c,q_85,usm_0.66_1.00_0.01,enc_auto/1071a8_cf1930f883e043e28d03d5a26a5960ef~mv2.jpg", # The Scream | |
"https://images.artbrokerage.com/artthumb/magritte_158194_1/625x559/Rene_Magritte_Le_Fils_De_lhomme_the_Son_of_Man_1973.jpg", # The Son of Man | |
"https://www.artic.edu/iiif/2/25c31d8d-21a4-9ea1-1d73-6a2eca4dda7e/full/843,/0/default.jpg", # The Bedroom | |
"https://images.desenio.com/zoom/17047_1.jpg", # Girl with a Pearl Earring | |
"https://www.hastingsindependentpress.co.uk/wp-content/uploads/2021/03/Whistlers-Mother.jpg", # Whistler’s Mother | |
"https://live.staticflickr.com/7173/6713746433_652c3d9d4e_c.jpg" # The Basket of Apples | |
], | |
"Title": [ | |
"Starry Night", "Mona Lisa", "The Persistence of Memory", "The Scream", | |
"The Son of Man", "The Bedroom", | |
"Girl with a Pearl Earring", "Whistler’s Mother", "The Basket of Apples" | |
], | |
"Description": [ | |
# Starry Night | |
("Starry Night by Vincent van Gogh, painted in 1889, is one of the most famous works of art in the world. " | |
"It depicts a swirling night sky filled with stars over a small town. The painting uses vibrant colors like blue and yellow, " | |
"with exaggerated swirling patterns that create a dreamlike, almost chaotic feeling."), | |
# Mona Lisa | |
("The Mona Lisa by Leonardo da Vinci, painted between 1503 and 1506, is a portrait of a woman with a subtle, enigmatic smile. " | |
"The use of muted colors, including soft browns, greens, and black, emphasizes the serene and mysterious nature of the subject. " | |
"It is one of the most studied and recognized works of art in history."), | |
# The Persistence of Memory | |
("The Persistence of Memory, created by Salvador Dalí in 1931, features melting clocks draped over a surreal landscape. " | |
"The painting, primarily in soft shades of brown, blue, and yellow, explores themes of time and memory. The abstract shapes " | |
"and dreamlike atmosphere make it one of Dalí’s most famous surrealist works."), | |
# The Scream | |
("The Scream by Edvard Munch, painted in 1893, is one of the most iconic images in modern art. " | |
"It depicts a figure standing on a bridge, clutching their face in agony, as a blood-red sky swirls behind them. " | |
"The painting uses bold reds, oranges, and blues to evoke a sense of horror and existential despair."), | |
# The Son of Man | |
("The Son of Man by René Magritte, painted in 1964, is a surrealist self-portrait of the artist. " | |
"It depicts a man in a bowler hat and suit, with his face obscured by a floating green apple. " | |
"The background features a cloudy sky and a low wall, contributing to the dreamlike atmosphere. The painting is rich in symbolism, " | |
"exploring themes of identity, concealment, and perception."), | |
# The Bedroom | |
("The Bedroom by Vincent van Gogh, painted in 1888, depicts the artist’s simple bedroom in Arles, France. " | |
"The painting uses bold, contrasting colors—yellow, red, and blue—to create a vibrant, almost childlike view of the space. " | |
"Van Gogh painted this scene three times, each version representing his sense of comfort and sanctuary in his personal space."), | |
# Girl with a Pearl Earring | |
("Girl with a Pearl Earring by Johannes Vermeer, painted in 1665, is often referred to as the 'Mona Lisa of the North.' " | |
"The painting shows a young girl looking over her shoulder, wearing a large pearl earring. The use of light and shadow, " | |
"combined with soft colors like blue and yellow, creates a lifelike, intimate portrait."), | |
# Whistler’s Mother | |
("Whistler’s Mother by James McNeill Whistler, painted in 1871, is a portrait of the artist’s mother seated in profile. " | |
"The painting uses muted tones of black, gray, and brown, reflecting the simplicity and dignity of the subject. " | |
"It has become an icon of motherhood and restraint."), | |
# The Basket of Apples | |
("The Basket of Apples by Paul Cézanne, painted around 1895, is a still life that challenges traditional perspectives. " | |
"The painting shows a table with a basket of apples, a bottle, and bread. The use of soft colors, including browns, reds, and greens, " | |
"along with the tilted angles, makes the objects seem to float, blurring the line between realism and abstraction.") | |
], | |
"Story": [ | |
# Starry Night | |
("Vincent van Gogh painted 'Starry Night' while in a mental asylum in Saint-Rémy-de-Provence, France. " | |
"It was created from memory and imagination, rather than a direct view from his window. The swirling patterns " | |
"are thought to represent his emotional turbulence at the time. The painting is celebrated for its bold brushstrokes " | |
"and imaginative use of color, representing the tension between beauty and chaos in the natural world."), | |
# Mona Lisa | |
("'Mona Lisa' was painted by Leonardo da Vinci during the Renaissance period. The subject of the painting, " | |
"believed to be Lisa Gherardini, is famed for her mysterious smile. The painting's sfumato technique, blending " | |
"soft transitions between light and shadow, creates a lifelike, three-dimensional appearance. The Mona Lisa has inspired " | |
"countless studies and interpretations over the centuries, and its theft in 1911 only increased its mystique."), | |
# The Persistence of Memory | |
("Salvador Dalí's 'The Persistence of Memory' is a surrealist masterpiece that reflects the fluidity of time and memory. " | |
"The melting clocks draped over the landscape suggest the passage of time becoming meaningless. The inspiration for the painting " | |
"came from a melting camembert cheese. Dalí’s fascination with dream states and Freud's theories of the unconscious mind " | |
"are evident in this strange, dreamlike scene."), | |
# The Scream | |
("'The Scream' by Edvard Munch is a vivid expression of anxiety and existential dread. Munch was inspired to create the work " | |
"after a walk during which he felt the 'great scream' of nature overwhelm him. The distorted figure and fiery red sky reflect " | |
"Munch’s inner turmoil. The painting has become an iconic representation of human anxiety and has been widely referenced in pop culture."), | |
# The Son of Man | |
("René Magritte’s 'The Son of Man' is a quintessential example of surrealism, blending reality and fantasy. " | |
"The painting is a self-portrait with Magritte’s face hidden by a hovering green apple, symbolizing the tension between what is visible " | |
"and what is hidden. The painting has been widely interpreted as a statement on identity and the nature of perception."), | |
# The Bedroom | |
("'The Bedroom' by Vincent van Gogh is a reflection of the artist’s longing for stability and tranquility. " | |
"The painting was created during one of the few peaceful periods in van Gogh’s turbulent life, and the vibrant colors convey " | |
"his emotions at the time. The bold, contrasting colors and exaggerated perspective make the simple room appear almost alive."), | |
# Girl with a Pearl Earring | |
("'Girl with a Pearl Earring' by Johannes Vermeer is one of the most enigmatic portraits in Western art. Known for its simplicity and elegance, " | |
"the painting captures a fleeting moment of connection between the viewer and the subject. The girl’s mysterious gaze and the radiant light " | |
"on her face have captivated audiences for centuries."), | |
# Whistler’s Mother | |
("James McNeill Whistler’s 'Arrangement in Grey and Black No. 1,' more commonly known as 'Whistler’s Mother,' is a stark, dignified portrait " | |
"of the artist’s mother. The painting is renowned for its minimalist composition and restrained use of color. Its iconic status grew after " | |
"its display at the Musée d'Orsay in Paris, becoming a symbol of maternal devotion and calm."), | |
# The Basket of Apples | |
("Paul Cézanne’s 'The Basket of Apples' is a revolutionary work that defies the traditional rules of perspective. By tilting objects at different angles, " | |
"Cézanne challenges the viewer’s perception of space and reality. This still life is often cited as a precursor to Cubism, and its soft color palette " | |
"creates a serene yet dynamic composition.") | |
] | |
} | |
df = pd.DataFrame(data) | |
# Load models | |
# Determine if a GPU (CUDA) is available | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
# TTS model | |
narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device) | |
# Load the CLIP model and processor | |
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device) | |
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32") | |
# Load the semantic similarity model for description search | |
semantic_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device=device) | |
# Load the translation models for Arabic to English and English to Arabic translations | |
translator_ar_to_en = pipeline("translation_ar_to_en", model="Helsinki-NLP/opus-mt-ar-en", device=0 if device == "cuda" else -1) | |
translator_en_to_ar = pipeline("translation_en_to_arabic", model="Helsinki-NLP/opus-mt-en-ar", device=0 if device == "cuda" else -1) | |
# Function to Convert the text to Speech in Ensglish | |
def text_to_speech_english(story_text): | |
audio_output = narrator(story_text) | |
# Extract audio and sampling rate from the output | |
audio = np.squeeze(audio_output['audio']) | |
sampling_rate = audio_output['sampling_rate'] | |
# Save the output as a WAV file using soundfile | |
sf.write("story_english.wav", audio, sampling_rate) | |
return "story_english.wav" | |
# Function to Convert the text to Speech in Arabic using gTTS | |
def text_to_speech_arabic(story_text): | |
tts = gTTS(text=story_text, lang='ar') | |
tts.save("story_arabic.mp3") | |
return "story_arabic.mp3" | |
# Function to translate the full story in chunks | |
def translate_story_to_arabic(story_text): | |
sentences = re.split(r'(?<=[.!؟])\s+', story_text) # ٍSplit the story to list of sentences to translate | |
translated_sentences = [] | |
for sentence in sentences: # For each sentence translate to arabic and append to the list | |
translation = translator_en_to_ar(sentence)[0]['translation_text'] | |
translated_sentences.append(translation) | |
return ' '.join(translated_sentences) # Return the translated sentences list elements as one String | |
# Function to check if the image URL is valid and fetches the image | |
def fetch_image_from_url(url): | |
try: | |
response = requests.get(url, stream=True) | |
response.raise_for_status() # Check if the request was successful | |
return Image.open(BytesIO(response.content)) # Return the image if valid | |
except Exception as e: | |
print(f"Error fetching image from {url}: {str(e)}") | |
return None | |
# Process the result where result is shown base on selected language | |
def process_best_match(best_match, language): | |
best_image_url = best_match["image_url"] | |
best_story = best_match["Story"] | |
# Translate to Arabic if the language is Arabic | |
if language == "Arabic" or language == "ar": | |
best_story_translated = translate_story_to_arabic(best_story) | |
info_html = f"<div dir='rtl' style='font-size: 18px; color: white; font-family: Arial, sans-serif;'>{best_story_translated}</div>" | |
audio_file = text_to_speech_arabic(best_story_translated) | |
return best_image_url, info_html, audio_file | |
# Otherwise, use English | |
info_html = f"<div style='font-size: 18px; color: white;'>{best_story}</div>" | |
audio_file = text_to_speech_english(best_story) | |
return best_image_url, info_html, audio_file | |
# Function to match the uploaded image with the DataFrame to retrive the image of painting from the Datafram and it story in text and audio | |
def compare_images(image, language): | |
try: | |
inputs = processor(images=image, return_tensors="pt") | |
inputs = {k: v.to(device) for k, v in inputs.items()} | |
image_features = model.get_image_features(**inputs).to(device) | |
best_score = -2.0 | |
best_match_idx = None | |
for idx, image_url in enumerate(df['image_url']): | |
db_image = fetch_image_from_url(image_url) | |
if db_image is None: | |
continue | |
db_inputs = processor(images=db_image, return_tensors="pt") | |
db_inputs = {k: v.to(device) for k, v in db_inputs.items()} | |
db_image_features = model.get_image_features(**db_inputs).to(device) | |
similarity = torch.nn.functional.cosine_similarity(image_features, db_image_features).item() | |
if similarity > best_score: | |
best_score = similarity | |
best_match_idx = idx | |
if best_match_idx is None: | |
return None, "Error: No valid image match found in the database.", None | |
best_match = df.iloc[best_match_idx] | |
return process_best_match(best_match, language) | |
except UnidentifiedImageError: | |
return None, "Error: The uploaded file is not a valid image.", None | |
except Exception as e: | |
return None, f"Error: {str(e)}", None | |
# Function to compare user input with descriptions in the DataFrame and return the best match Painting as image of painting with text and audio story of painting | |
def compare_description(input_text): | |
try: | |
language = detect(input_text) #detect the langauge of input | |
if language == 'ar': | |
input_text = translator_ar_to_en(input_text)[0]['translation_text'] | |
input_embedding = semantic_model.encode(input_text, convert_to_tensor=True) | |
df_embeddings = semantic_model.encode(df["Description"].tolist(), convert_to_tensor=True) | |
similarities = util.pytorch_cos_sim(input_embedding, df_embeddings).squeeze() | |
best_match_idx = torch.argmax(similarities).item() | |
best_match = df.iloc[best_match_idx] | |
return process_best_match(best_match, language) | |
except Exception as e: | |
return None, f"Error: {str(e)}", None | |
# Custom CSS for Styling the Gradio | |
custom_css = """ | |
.gradio-container { | |
background-image: url('https://images.squarespace-cdn.com/content/v1/587ee1eab3db2b428f68d221/1626734192415-LI75A3LVVFMJD5TVZ3HR/Gallery+2.jpg'); | |
background-size: cover; | |
background-position: center; | |
background-repeat: no-repeat; | |
color: #333333; | |
font-family: 'Arial', sans-serif; | |
} | |
h1, #title, #description { | |
color: white !important; | |
} | |
#upload-text, #description-search-text { | |
color: white !important; | |
} | |
label, .gr-label { | |
color: #333333 !important; | |
} | |
button.primary { | |
background-color: #6A5ACD; | |
color: black; | |
border-radius: 10px; | |
padding: 10px; | |
margin: 5px; | |
font-size: 18px; | |
border: none; | |
transition: background-color 0.3s; | |
} | |
button.primary:hover { | |
background-color: #836FFF; | |
} | |
#image_output, #search_image_output { | |
border: 3px solid white; | |
border-radius: 10px; | |
} | |
/* Specifically targeting the example buttons */ | |
.gr-examples button { | |
color: white !important; | |
background-color: transparent !important; /* Make the background blend in with the overall theme */ | |
border: 1px solid white; /* Add a border if you want to highlight it */ | |
} | |
""" | |
image_upload_examples = [ | |
["https://pbs.twimg.com/media/DgAnD-FUcAAr3NT?format=jpg", "English"], | |
["https://pbs.twimg.com/media/DgAnD-FUcAAr3NT?format=jpg", "Arabic"] | |
] | |
# Sample Examples for the "Description Search" tab | |
description_search_examples = [ | |
["Woman with a mysterious smile.", "English"], | |
["امرأة بابتسامة غامضة.", "Arabic"] | |
] | |
# Gradio interface with two tabs: "Image Upload" and "Description Search" | |
# Image Upload tab to get the Painting story by uploding an image | |
# Description Search tab is by getting Painting stroy by descriping the painting | |
with gr.Blocks(css=custom_css) as demo: | |
gr.Markdown("<h1 id='title'>Welcome to the Virtual Art Museum</h1>") | |
gr.Markdown("<p id='description'>Explore the most famous artworks. Upload an image or enter a description to learn about the story behind each piece.</p>") | |
with gr.Tab("Image Search"): | |
gr.Markdown("<h2 id='upload-text'>Upload Art to Recognize and Hear the Story Behind It</h2>") | |
image_input = gr.Image(type="pil", label="Upload an image of an art piece") | |
language_selector = gr.Radio(choices=["English", "Arabic"], label="Select Language for Story Narration", value="English") | |
recognize_button = gr.Button("Search") | |
image_output = gr.Image(label="Matched Art Piece", elem_id="image_output") | |
description_output = gr.HTML(label="Art Piece Information") | |
audio_output = gr.Audio(label="Narration of the Story") | |
recognize_button.click(compare_images, inputs=[image_input, language_selector], outputs=[image_output, description_output, audio_output]) | |
gr.Examples(examples=image_upload_examples, inputs=[image_input, language_selector]) | |
with gr.Tab("Description Search"): | |
gr.Markdown("<h2 id='description-search-text'>Description Search</h2>") | |
description_input = gr.Textbox(label="Enter a description (in English or Arabic)") | |
search_button = gr.Button("Search") | |
search_image_output = gr.Image(label="Matched Art Piece", elem_id="search_image_output") | |
search_description_output = gr.HTML(label="Art Piece Information") | |
search_audio_output = gr.Audio(label="Narration of the Story") | |
search_button.click(compare_description, inputs=description_input, outputs=[search_image_output, search_description_output, search_audio_output]) | |
gr.Examples(examples=description_search_examples, inputs=description_input) | |
demo.launch() |