ArtVoice_Tour / app.py
ghadaAlmuaikel's picture
Update app.py
3185216 verified
#Imports
import gradio as gr
from PIL import Image, UnidentifiedImageError
from gtts import gTTS
import requests
import re
import torch
from transformers import CLIPProcessor, CLIPModel, pipeline
from sentence_transformers import SentenceTransformer, util
from langdetect import detect
from io import BytesIO
import pandas as pd
import numpy as np
import soundfile as sf
import os
import subprocess
# Run the setup script to install espeak-ng
subprocess.run(['bash', 'setup.sh'], check=True)
# DataFrame with information about the Paintings as image url, Title, description , stroy
data = {
"image_url": [
"https://s.turbifycdn.com/aah/gallerydirectart/vincent-van-gogh-estate-signed-limited-edition-giclee-starry-night-47.png", # Starry Night
"https://cdn.mos.cms.futurecdn.net/xRqbwS4odpkSQscn3jHECh-1200-80.jpg", # Mona Lisa
"https://upload.wikimedia.org/wikipedia/en/d/dd/The_Persistence_of_Memory.jpg", # The Persistence of Memory
"https://static.wixstatic.com/media/1071a8_cf1930f883e043e28d03d5a26a5960ef~mv2.jpg/v1/fill/w_568,h_718,al_c,q_85,usm_0.66_1.00_0.01,enc_auto/1071a8_cf1930f883e043e28d03d5a26a5960ef~mv2.jpg", # The Scream
"https://images.artbrokerage.com/artthumb/magritte_158194_1/625x559/Rene_Magritte_Le_Fils_De_lhomme_the_Son_of_Man_1973.jpg", # The Son of Man
"https://www.artic.edu/iiif/2/25c31d8d-21a4-9ea1-1d73-6a2eca4dda7e/full/843,/0/default.jpg", # The Bedroom
"https://images.desenio.com/zoom/17047_1.jpg", # Girl with a Pearl Earring
"https://www.hastingsindependentpress.co.uk/wp-content/uploads/2021/03/Whistlers-Mother.jpg", # Whistler’s Mother
"https://live.staticflickr.com/7173/6713746433_652c3d9d4e_c.jpg" # The Basket of Apples
],
"Title": [
"Starry Night", "Mona Lisa", "The Persistence of Memory", "The Scream",
"The Son of Man", "The Bedroom",
"Girl with a Pearl Earring", "Whistler’s Mother", "The Basket of Apples"
],
"Description": [
# Starry Night
("Starry Night by Vincent van Gogh, painted in 1889, is one of the most famous works of art in the world. "
"It depicts a swirling night sky filled with stars over a small town. The painting uses vibrant colors like blue and yellow, "
"with exaggerated swirling patterns that create a dreamlike, almost chaotic feeling."),
# Mona Lisa
("The Mona Lisa by Leonardo da Vinci, painted between 1503 and 1506, is a portrait of a woman with a subtle, enigmatic smile. "
"The use of muted colors, including soft browns, greens, and black, emphasizes the serene and mysterious nature of the subject. "
"It is one of the most studied and recognized works of art in history."),
# The Persistence of Memory
("The Persistence of Memory, created by Salvador Dalí in 1931, features melting clocks draped over a surreal landscape. "
"The painting, primarily in soft shades of brown, blue, and yellow, explores themes of time and memory. The abstract shapes "
"and dreamlike atmosphere make it one of Dalí’s most famous surrealist works."),
# The Scream
("The Scream by Edvard Munch, painted in 1893, is one of the most iconic images in modern art. "
"It depicts a figure standing on a bridge, clutching their face in agony, as a blood-red sky swirls behind them. "
"The painting uses bold reds, oranges, and blues to evoke a sense of horror and existential despair."),
# The Son of Man
("The Son of Man by René Magritte, painted in 1964, is a surrealist self-portrait of the artist. "
"It depicts a man in a bowler hat and suit, with his face obscured by a floating green apple. "
"The background features a cloudy sky and a low wall, contributing to the dreamlike atmosphere. The painting is rich in symbolism, "
"exploring themes of identity, concealment, and perception."),
# The Bedroom
("The Bedroom by Vincent van Gogh, painted in 1888, depicts the artist’s simple bedroom in Arles, France. "
"The painting uses bold, contrasting colors—yellow, red, and blue—to create a vibrant, almost childlike view of the space. "
"Van Gogh painted this scene three times, each version representing his sense of comfort and sanctuary in his personal space."),
# Girl with a Pearl Earring
("Girl with a Pearl Earring by Johannes Vermeer, painted in 1665, is often referred to as the 'Mona Lisa of the North.' "
"The painting shows a young girl looking over her shoulder, wearing a large pearl earring. The use of light and shadow, "
"combined with soft colors like blue and yellow, creates a lifelike, intimate portrait."),
# Whistler’s Mother
("Whistler’s Mother by James McNeill Whistler, painted in 1871, is a portrait of the artist’s mother seated in profile. "
"The painting uses muted tones of black, gray, and brown, reflecting the simplicity and dignity of the subject. "
"It has become an icon of motherhood and restraint."),
# The Basket of Apples
("The Basket of Apples by Paul Cézanne, painted around 1895, is a still life that challenges traditional perspectives. "
"The painting shows a table with a basket of apples, a bottle, and bread. The use of soft colors, including browns, reds, and greens, "
"along with the tilted angles, makes the objects seem to float, blurring the line between realism and abstraction.")
],
"Story": [
# Starry Night
("Vincent van Gogh painted 'Starry Night' while in a mental asylum in Saint-Rémy-de-Provence, France. "
"It was created from memory and imagination, rather than a direct view from his window. The swirling patterns "
"are thought to represent his emotional turbulence at the time. The painting is celebrated for its bold brushstrokes "
"and imaginative use of color, representing the tension between beauty and chaos in the natural world."),
# Mona Lisa
("'Mona Lisa' was painted by Leonardo da Vinci during the Renaissance period. The subject of the painting, "
"believed to be Lisa Gherardini, is famed for her mysterious smile. The painting's sfumato technique, blending "
"soft transitions between light and shadow, creates a lifelike, three-dimensional appearance. The Mona Lisa has inspired "
"countless studies and interpretations over the centuries, and its theft in 1911 only increased its mystique."),
# The Persistence of Memory
("Salvador Dalí's 'The Persistence of Memory' is a surrealist masterpiece that reflects the fluidity of time and memory. "
"The melting clocks draped over the landscape suggest the passage of time becoming meaningless. The inspiration for the painting "
"came from a melting camembert cheese. Dalí’s fascination with dream states and Freud's theories of the unconscious mind "
"are evident in this strange, dreamlike scene."),
# The Scream
("'The Scream' by Edvard Munch is a vivid expression of anxiety and existential dread. Munch was inspired to create the work "
"after a walk during which he felt the 'great scream' of nature overwhelm him. The distorted figure and fiery red sky reflect "
"Munch’s inner turmoil. The painting has become an iconic representation of human anxiety and has been widely referenced in pop culture."),
# The Son of Man
("René Magritte’s 'The Son of Man' is a quintessential example of surrealism, blending reality and fantasy. "
"The painting is a self-portrait with Magritte’s face hidden by a hovering green apple, symbolizing the tension between what is visible "
"and what is hidden. The painting has been widely interpreted as a statement on identity and the nature of perception."),
# The Bedroom
("'The Bedroom' by Vincent van Gogh is a reflection of the artist’s longing for stability and tranquility. "
"The painting was created during one of the few peaceful periods in van Gogh’s turbulent life, and the vibrant colors convey "
"his emotions at the time. The bold, contrasting colors and exaggerated perspective make the simple room appear almost alive."),
# Girl with a Pearl Earring
("'Girl with a Pearl Earring' by Johannes Vermeer is one of the most enigmatic portraits in Western art. Known for its simplicity and elegance, "
"the painting captures a fleeting moment of connection between the viewer and the subject. The girl’s mysterious gaze and the radiant light "
"on her face have captivated audiences for centuries."),
# Whistler’s Mother
("James McNeill Whistler’s 'Arrangement in Grey and Black No. 1,' more commonly known as 'Whistler’s Mother,' is a stark, dignified portrait "
"of the artist’s mother. The painting is renowned for its minimalist composition and restrained use of color. Its iconic status grew after "
"its display at the Musée d'Orsay in Paris, becoming a symbol of maternal devotion and calm."),
# The Basket of Apples
("Paul Cézanne’s 'The Basket of Apples' is a revolutionary work that defies the traditional rules of perspective. By tilting objects at different angles, "
"Cézanne challenges the viewer’s perception of space and reality. This still life is often cited as a precursor to Cubism, and its soft color palette "
"creates a serene yet dynamic composition.")
]
}
df = pd.DataFrame(data)
# Load models
# Determine if a GPU (CUDA) is available
device = "cuda" if torch.cuda.is_available() else "cpu"
# TTS model
narrator = pipeline("text-to-speech", model="kakao-enterprise/vits-ljs", device=device)
# Load the CLIP model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32").to(device)
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
# Load the semantic similarity model for description search
semantic_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2', device=device)
# Load the translation models for Arabic to English and English to Arabic translations
translator_ar_to_en = pipeline("translation_ar_to_en", model="Helsinki-NLP/opus-mt-ar-en", device=0 if device == "cuda" else -1)
translator_en_to_ar = pipeline("translation_en_to_arabic", model="Helsinki-NLP/opus-mt-en-ar", device=0 if device == "cuda" else -1)
# Function to Convert the text to Speech in Ensglish
def text_to_speech_english(story_text):
audio_output = narrator(story_text)
# Extract audio and sampling rate from the output
audio = np.squeeze(audio_output['audio'])
sampling_rate = audio_output['sampling_rate']
# Save the output as a WAV file using soundfile
sf.write("story_english.wav", audio, sampling_rate)
return "story_english.wav"
# Function to Convert the text to Speech in Arabic using gTTS
def text_to_speech_arabic(story_text):
tts = gTTS(text=story_text, lang='ar')
tts.save("story_arabic.mp3")
return "story_arabic.mp3"
# Function to translate the full story in chunks
def translate_story_to_arabic(story_text):
sentences = re.split(r'(?<=[.!؟])\s+', story_text) # ٍSplit the story to list of sentences to translate
translated_sentences = []
for sentence in sentences: # For each sentence translate to arabic and append to the list
translation = translator_en_to_ar(sentence)[0]['translation_text']
translated_sentences.append(translation)
return ' '.join(translated_sentences) # Return the translated sentences list elements as one String
# Function to check if the image URL is valid and fetches the image
def fetch_image_from_url(url):
try:
response = requests.get(url, stream=True)
response.raise_for_status() # Check if the request was successful
return Image.open(BytesIO(response.content)) # Return the image if valid
except Exception as e:
print(f"Error fetching image from {url}: {str(e)}")
return None
# Process the result where result is shown base on selected language
def process_best_match(best_match, language):
best_image_url = best_match["image_url"]
best_story = best_match["Story"]
# Translate to Arabic if the language is Arabic
if language == "Arabic" or language == "ar":
best_story_translated = translate_story_to_arabic(best_story)
info_html = f"<div dir='rtl' style='font-size: 18px; color: white; font-family: Arial, sans-serif;'>{best_story_translated}</div>"
audio_file = text_to_speech_arabic(best_story_translated)
return best_image_url, info_html, audio_file
# Otherwise, use English
info_html = f"<div style='font-size: 18px; color: white;'>{best_story}</div>"
audio_file = text_to_speech_english(best_story)
return best_image_url, info_html, audio_file
# Function to match the uploaded image with the DataFrame to retrive the image of painting from the Datafram and it story in text and audio
def compare_images(image, language):
try:
inputs = processor(images=image, return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}
image_features = model.get_image_features(**inputs).to(device)
best_score = -2.0
best_match_idx = None
for idx, image_url in enumerate(df['image_url']):
db_image = fetch_image_from_url(image_url)
if db_image is None:
continue
db_inputs = processor(images=db_image, return_tensors="pt")
db_inputs = {k: v.to(device) for k, v in db_inputs.items()}
db_image_features = model.get_image_features(**db_inputs).to(device)
similarity = torch.nn.functional.cosine_similarity(image_features, db_image_features).item()
if similarity > best_score:
best_score = similarity
best_match_idx = idx
if best_match_idx is None:
return None, "Error: No valid image match found in the database.", None
best_match = df.iloc[best_match_idx]
return process_best_match(best_match, language)
except UnidentifiedImageError:
return None, "Error: The uploaded file is not a valid image.", None
except Exception as e:
return None, f"Error: {str(e)}", None
# Function to compare user input with descriptions in the DataFrame and return the best match Painting as image of painting with text and audio story of painting
def compare_description(input_text):
try:
language = detect(input_text) #detect the langauge of input
if language == 'ar':
input_text = translator_ar_to_en(input_text)[0]['translation_text']
input_embedding = semantic_model.encode(input_text, convert_to_tensor=True)
df_embeddings = semantic_model.encode(df["Description"].tolist(), convert_to_tensor=True)
similarities = util.pytorch_cos_sim(input_embedding, df_embeddings).squeeze()
best_match_idx = torch.argmax(similarities).item()
best_match = df.iloc[best_match_idx]
return process_best_match(best_match, language)
except Exception as e:
return None, f"Error: {str(e)}", None
# Custom CSS for Styling the Gradio
custom_css = """
.gradio-container {
background-image: url('https://images.squarespace-cdn.com/content/v1/587ee1eab3db2b428f68d221/1626734192415-LI75A3LVVFMJD5TVZ3HR/Gallery+2.jpg');
background-size: cover;
background-position: center;
background-repeat: no-repeat;
color: #333333;
font-family: 'Arial', sans-serif;
}
h1, #title, #description {
color: white !important;
}
#upload-text, #description-search-text {
color: white !important;
}
label, .gr-label {
color: #333333 !important;
}
button.primary {
background-color: #6A5ACD;
color: black;
border-radius: 10px;
padding: 10px;
margin: 5px;
font-size: 18px;
border: none;
transition: background-color 0.3s;
}
button.primary:hover {
background-color: #836FFF;
}
#image_output, #search_image_output {
border: 3px solid white;
border-radius: 10px;
}
/* Specifically targeting the example buttons */
.gr-examples button {
color: white !important;
background-color: transparent !important; /* Make the background blend in with the overall theme */
border: 1px solid white; /* Add a border if you want to highlight it */
}
"""
image_upload_examples = [
["https://pbs.twimg.com/media/DgAnD-FUcAAr3NT?format=jpg", "English"],
["https://pbs.twimg.com/media/DgAnD-FUcAAr3NT?format=jpg", "Arabic"]
]
# Sample Examples for the "Description Search" tab
description_search_examples = [
["Woman with a mysterious smile.", "English"],
["امرأة بابتسامة غامضة.", "Arabic"]
]
# Gradio interface with two tabs: "Image Upload" and "Description Search"
# Image Upload tab to get the Painting story by uploding an image
# Description Search tab is by getting Painting stroy by descriping the painting
with gr.Blocks(css=custom_css) as demo:
gr.Markdown("<h1 id='title'>Welcome to the Virtual Art Museum</h1>")
gr.Markdown("<p id='description'>Explore the most famous artworks. Upload an image or enter a description to learn about the story behind each piece.</p>")
with gr.Tab("Image Search"):
gr.Markdown("<h2 id='upload-text'>Upload Art to Recognize and Hear the Story Behind It</h2>")
image_input = gr.Image(type="pil", label="Upload an image of an art piece")
language_selector = gr.Radio(choices=["English", "Arabic"], label="Select Language for Story Narration", value="English")
recognize_button = gr.Button("Search")
image_output = gr.Image(label="Matched Art Piece", elem_id="image_output")
description_output = gr.HTML(label="Art Piece Information")
audio_output = gr.Audio(label="Narration of the Story")
recognize_button.click(compare_images, inputs=[image_input, language_selector], outputs=[image_output, description_output, audio_output])
gr.Examples(examples=image_upload_examples, inputs=[image_input, language_selector])
with gr.Tab("Description Search"):
gr.Markdown("<h2 id='description-search-text'>Description Search</h2>")
description_input = gr.Textbox(label="Enter a description (in English or Arabic)")
search_button = gr.Button("Search")
search_image_output = gr.Image(label="Matched Art Piece", elem_id="search_image_output")
search_description_output = gr.HTML(label="Art Piece Information")
search_audio_output = gr.Audio(label="Narration of the Story")
search_button.click(compare_description, inputs=description_input, outputs=[search_image_output, search_description_output, search_audio_output])
gr.Examples(examples=description_search_examples, inputs=description_input)
demo.launch()