File size: 2,395 Bytes
c07926e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import gradio as gr
import torch
from transformers import ViTFeatureExtractor, ViTForImageClassification, pipeline
from PIL import Image
import easyocr
from aesthetic_score import AestheticScorer

# Initialize models
# Vision Transformer for Emotion Detection
feature_extractor = ViTFeatureExtractor.from_pretrained("nateraw/vit-base-beauty")
model = ViTForImageClassification.from_pretrained("nateraw/vit-base-beauty")
emotion_detector = pipeline("image-classification", model=model, feature_extractor=feature_extractor)

# OCR for Text Detection
reader = easyocr.Reader(['en'])

# Aesthetic Scoring Model (Placeholder)
aesthetic_scorer = AestheticScorer()

# Function to analyze a single thumbnail
def analyze_single_thumbnail(image):
    # Text detection using OCR
    ocr_result = reader.readtext(image)
    text_detected = " ".join([item[1] for item in ocr_result]) if ocr_result else "No text found"

    # Emotion detection for overall appeal
    emotions = emotion_detector(image)
    main_emotion = max(emotions, key=lambda x: x['score'])['label'] if emotions else "Unknown"

    # Aesthetic scoring
    aesthetic_score = aesthetic_scorer.score(image)

    return {
        "Detected Text": text_detected,
        "Emotion Detected": main_emotion,
        "Aesthetic Score": aesthetic_score,
    }

# Function to compare two thumbnails
def compare_thumbnails(image1, image2):
    result1 = analyze_single_thumbnail(image1)
    result2 = analyze_single_thumbnail(image2)

    # Determine which thumbnail might perform better based on scores
    if result1["Aesthetic Score"] > result2["Aesthetic Score"]:
        better_thumbnail = "Thumbnail 1 is likely better."
    elif result2["Aesthetic Score"] > result1["Aesthetic Score"]:
        better_thumbnail = "Thumbnail 2 is likely better."
    else:
        better_thumbnail = "Both thumbnails have similar appeal."

    return result1, result2, better_thumbnail

# Gradio Interface
iface = gr.Interface(
    fn=compare_thumbnails,
    inputs=[gr.Image(type="pil"), gr.Image(type="pil")],
    outputs=[
        gr.JSON(label="Thumbnail 1 Analysis"),
        gr.JSON(label="Thumbnail 2 Analysis"),
        gr.Textbox(label="Comparison Result"),
    ],
    title="YouTube Thumbnail Comparator",
    description="Upload two thumbnails to compare their effectiveness based on detected text, emotions, and aesthetic score.",
)

iface.launch()