File size: 9,244 Bytes

import numpy as np
import subprocess
import cv2

# with subprocess and an extra argument 'scene' and a 'resized image saved as png' we can call the server

# yt-dlp is instaled in .d4
# Download Part of Video
# yt-dlp https://www.youtube.com/watch?v=UZ9uyQI3pF0 --downloader ffmpeg --downloader-args "ffmpeg_i:-ss 997 -to 2512"
# ffmpeg -i Sandra\ Kotevska\,\ Painting\ Rose\ bush\,\ mixed\ media\,\ 2017.\ \[NMzC_036MtE\].mkv -f mp3 -ar 22050 -vn out44.wav -ac 1

# https://superuser.com/questions/583393/how-to-extract-subtitle-from-video-using-ffmpeg

def _shift(x):
    n = x.shape[0]
    i = np.random.randint(.24 * n, .74 * n)
    return np.roll(x, i)

#___________________________________________________________________________________________________
#   VIDEO FROM IMAGE with CAPTIONS
#
# UPLOAD to: Simaviro: Documents General WORK PACKAGES WP1 ContentRepository ANBPR_ROMANIA TTSvideos
# __________________________________________________________________________________________________

# TO DONLOAD SRT for youtub
# yt-dlp --write-sub --sub-lang en --convert-subs "srt" https://www.youtube.com/watch?v=F1Ib7TAu7eg&list=PL4x2B6LSwFewdDvRnUTpBM7jkmpwouhPv&index=2

# _voice = 'en_US/vctk_low#p330'
# _voice = 'en_US/cmu-arctic_low#lnh' #en_US/vctk_low#p249'  # 'en_US/vctk_low#p282'
# _voice = ''en_US/vctk_low#p351''
# _voice = 'en_US/vctk_low#p351'  # avoid 318 it does the ghhhhhh
# _voice = 'en_US/m-ailabs_low#judy_bieber'  # Nice voice for ('Arta culinara romaneasca - Groza Irina [phIF0NxgwlQ].mkv' 'Arta culinara romaneasca - Groza Irina [phIF0NxgwlQ].en-GB.srt'),
# _voice = 'en_UK/apope_low'
# _voice = 'en_US/m-ailabs_low#mary_ann'
# _voice = 'en_US/vctk_low#p351'
# _voice = 'en_US/hifi-tts_low#92'
# voice_str = f'_{_voice.replace("/", "")}'





# image/descriptions provided by other SHIFT tool or Human curator

# https://simaviro.sharepoint.com/sites/SHIFT/Shared%20Documents/Forms/AllItems.aspx?csf=1&web=1&e=JNK8dQ&cid=363c253d%2D4d61%2D4db1%2D8ffd%2Ddedda749da2d&RootFolder=%2Fsites%2FSHIFT%2FShared%20Documents%2FGENERAL%2FWORK%20PACKAGES%2FWP1%2FContent%20Repository%2Fshift%5FSPK%5Fuse%5Fcases%5Fshare%2F02%5Fuc%5Fspk%5FLandscape2Soundscape%2FLandscape2Soundscape%5F12%5FMasterpieces&FolderCTID=0x01200058F5037C0101524B82F6F0788C02A563
# STATIC_FRAME = 'uc_spk_Landscape2Soundscape_Masterpieces_pics/01_Schick_AII840_001.jpg' #'assets/image_from_T31.jpg'




PIC_DIR = 'uc_spk_Landscape2Soundscape_Masterpieces_pics/'

DESCRIPTIONS = [
    # 1
    [
        '01_Schick_AII840_001.jpg',                               # image
        '01_Schick_AII840_001.txt',                               # text
        'Statue in shire hill on autumn beach.',                  # audiocraft
        'Gottlieb Schick - Bildnis der Heinrike Dannecker - 1802', # cv2 puttext title
        'en_US/m-ailabs_low#mary_ann',
     ],  
    # 2
    [
        '02_Constable_AI555_001.jpg',
        '02_Constable_AI555_001.txt',
        'Meadows country farm village in sight',
        'John Constable - Dorf an dem Flusse Stour - 1804',
        'en_US/m-ailabs_low#mary_ann',
    ],
    # 3
    [
        '03_Schinkel_WS200-002.jpg',
        '03_Schinkel_WS200-002.txt',
        'Arriving at the shore on horses',
        'Karl Friedrich Schinkel - Gotische Kirche auf einem Felsen am Meer - 1815',
        'en_US/m-ailabs_low#mary_ann',
    ],
    #
    [
        '04_Friedrich_FV317_001.jpg',
        '04_Friedrich_FV317_001.txt',
        'Land steppes',
        'Friedrich Caspar David - Der Watzmann - 1824/1825',
        'en_US/m-ailabs_low#mary_ann',
    ],
    #
    [
        '05_Blechen_FV40_001.jpg',
        '05_Blechen_FV40_001.txt',
        'fjords',
        'Blechen - Carl Unwetter in der römischen Campagna - 1829',
        'en_US/m-ailabs_low#mary_ann',
    ],
    # 6
    [
        '06_Menzel_AI900_001.jpg'
        '06_Menzel_AI900_001.txt',
        'Olive trees in Seville',
        'Adolph Menzel - Bauplatz mit Weiden - 1846',
        'en_US/m-ailabs_low#mary_ann',
    ],
    # 7
    [
        '07_Courbet_AI967_001.jpg',
        '07_Courbet_AI967_001.txt',
        'Storm at the strand of waves Tsunami',
        'Gustave Courbet - Die Welle - 1869/1870',
        'en_US/m-ailabs_low#mary_ann',
    ],
    # 8
    [
        '08_Monet_AI1013_001.jpg',
        '08_Monet_AI1013_001.txt',
        'Mai flowers blossom picnic',
        'Claude Monet - Sommertag - 1874',
        'en_US/m-ailabs_low#mary_ann',
    ],
    # 9
    [
        '09_Blechen_AII823_001.jpg',
        '09_Blechen_AII823_001.txt',
        'Cascade in Africa',
        'Carl Blechen - Wasserfälle bei Tivoli - 1832',
        'en_US/m-ailabs_low#mary_ann',
    ],
    # 10
    [
        '10_Boecklin_967648_NG2-80_001_rsz.jpg',
        '10_Boecklin_967648_NG2-80_001.txt',
        'Hades ades at it sisland',
        'Arnold Böcklin - Toteninsel - 1883',
        'en_US/m-ailabs_low#mary_ann',
    ],
    # 11
    [
        '11_Liebermann_NG4-94_001.jpg',
        '11_Liebermann_NG4-94_001.txt',
        'Tavern at the waterfront',
        'Max Tiebermann - Gartenlokal an der Havel. Nikolskoe - 1916',
        'en_US/m-ailabs_low#mary_ann',
    ],
    # 12
    [
        '12_Slevogt_AII1022_001.jpg',
        '12_Slevogt_AII1022_001.txt',
        'toy sailing yachts pool',
        'Max Slevogt - Segelboote auf der Alster am Abend -1905',
        'en_US/m-ailabs_low#mary_ann',
    ],
]


SILENT_VIDEO = '_silent_video.mp4'


# SILENT CLIP


for _img_, _text_, soundscape_text, _title_, _voice_ in DESCRIPTIONS[:20]:

    # cv2put txt
    im = cv2.imread(PIC_DIR + _img_)  # IMG must have EVEN shape
    h, w, _ = im.shape
    im = im[(h%2):, (w%2):, :]  # assure even image
    print(im.shape, "GLOBAL IM\n\n\n\n")
    fram = np.zeros((94, im.shape[1], 3), dtype=np.uint8)
    h, w, _ = fram.shape
    font                   = cv2.FONT_HERSHEY_SIMPLEX
    bottomLeftCornerOfText = (240, 74)  # w,h
    fontScale              = 2
    fontColor              = (255, 255, 255)
    thickness              = 4
    lineType               = 2
    cv2.putText(fram, _title_, #'LandScape 2 SoundScape',
        bottomLeftCornerOfText,
        font,
        fontScale,
        fontColor,
        thickness,
        lineType)
    offset_h = 24
    im[offset_h:h+offset_h, :w, :] = (.4 * im[offset_h:h+offset_h, :w, :] + .6 * fram).astype(np.uint8)
    # cv2.imshow('i', im); cv2.waitKey(); cv2.destroyAllWindows()

    # logo aud

    logo = cv2.imread('assets/audeering_logo.jpg')[:740, :, :]
    logo = cv2.resize(logo, (logo.shape[1]//2, logo.shape[0]//2))
    h, w, _ = logo.shape
    offset_h = im.shape[0] - h
    im[offset_h:h+offset_h, :w, :] = (.23 * im[offset_h:h+offset_h, :w, :] + .77 * logo).astype(np.uint8)

    # logo SMB

    logo = cv2.imread('assets/SMB_logo.png')#[:740, :, :]
    logo = cv2.resize(logo, (logo.shape[1]//2, logo.shape[0]//2))
    h, w, _ = logo.shape
    offset_h = im.shape[0] - h
    # fill logo SMB with the pixels of im - where SMB is empty
    ptc = im[offset_h:h+offset_h, :w, :]
    logo[logo == 0] = ptc[logo == 0]  # fill empty
    im[offset_h:h+offset_h, :w, :] = (.13 * im[offset_h:h+offset_h, :w, :] + .86 * logo).astype(np.uint8)

    # # logo shift

    # logo = cv2.imread('assets/shift_logo.png')#[:740, :, :]
    # logo = cv2.resize(logo, (logo.shape[1]//2, logo.shape[0]//2))
    # h, w, _ = logo.shape
    # offset_h = im.shape[0] - h #-274
    # offset_w = im.shape[1] - w #400
    # # # fill logo SMB with the pixels of im - where SMB is empty
    # ptc = im[offset_h:h+offset_h, :w, :]
    # # msk = np.tile(logo[:, :,0:1] > 252, [1,1,3])
    # # logo[msk] = ptc[msk]  # fill empty
    # im[offset_h:h+offset_h, offset_w:w+offset_w, :] = (.0 * im[offset_h:h+offset_h, offset_w:w+offset_w, :] + 1 * logo).astype(np.uint8)

    # silent video - img
    # im = cv2.resize(im, (700, 700))
    cv2.imwrite('pic_logo_emb.png', im)
    
    
    

    # raw, _ = soundfile.read(soundscape_file)  # 12345, 2

    # # fill
    # soundscape = []
    # for _replica in range(math.ceil(len(total) / raw.shape[0])+1):
    #     soundscape.append(raw)  # _shift non defined for stereo
    # soundscape = np.concatenate(soundscape, 0)

    # total = .36 * np.concatenate([total[:, None], 
    #                              total[:, None]], 1) + .64 * soundscape[:len(total), :]

    # outfile

    OUT_FILE = _img_.split('/')[-1].replace('.','__') + '.mp4'  # assets / -1
    print(f'{OUT_FILE=}\n')
    # call API passing img
    
    subprocess.run(
            [   
             "python",
             "tts.py",
             "--text", PIC_DIR + _text_,
             '--image', 'pic_logo_emb.png',
              # "--title", _title_,
              # '--soundscape_text', soundscape_text,
             '--voice', _voice_,
             '--out_file', OUT_FILE,
                ])

    # soundfile.write(AUDIO_TRACK, total, 22050)
    # subprocess.call(
    #     ["ffmpeg",
    #         "-y",
    #         "-i",
    #         SILENT_VIDEO,
    #         "-i",
    #         AUDIO_TRACK,
    #         #"-c:v",
    #         #"copy",
    #         "-map",
    #         "0:v:0",
    #         "-map",
    #         " 1:a:0",
    #         "-vf",
    #         "pad",
    #         OUT_FILE])