Commit
·
b4bce9a
1
Parent(s):
61866ad
all codes
Browse files- .gitignore +2 -0
- app.py +157 -10
- data.json +192 -0
- images/2.jpg +0 -0
- images/3.jpg +0 -0
- images/hero.jpg +0 -0
.gitignore
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
images/2.png
|
2 |
+
*.mp4
|
app.py
CHANGED
@@ -1,23 +1,170 @@
|
|
1 |
import pathlib
|
2 |
import uuid
|
3 |
import os
|
4 |
-
import
|
5 |
from tqdm import tqdm
|
|
|
|
|
|
|
|
|
|
|
6 |
|
|
|
|
|
7 |
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
13 |
|
14 |
-
examples = [["abcdefghijkadadasdasdasdasdasddad", "abcdefghijkadadasdasdasdasdasddad","Good morning, it's great to see you! I hope you're having a wonderful day. I just wanted to say thank you for taking the time to speak with me. Is there anything new or exciting happening in your life? I'd love to hear about it. Let's catch up soon!",
|
15 |
-
"./hero.webp"]]
|
16 |
demo = gr.Interface(fn=transcribe_video, inputs=[
|
17 |
gr.Textbox(label="D-Id API Key",placeholder="Paste your D-Id",type='password'),
|
18 |
gr.Textbox(label="Elevenlabs API Keys",placeholder="Paste Elevenlabs",type='password'),
|
19 |
gr.Textbox(lines=4, label=" Please input the text you wish to generate in order to make the photo speak.", placeholder="English Text here"),
|
20 |
-
gr.
|
21 |
-
|
|
|
22 |
|
23 |
demo.launch()
|
|
|
1 |
import pathlib
|
2 |
import uuid
|
3 |
import os
|
4 |
+
import alphaui as gr
|
5 |
from tqdm import tqdm
|
6 |
+
import requests
|
7 |
+
import urllib.request
|
8 |
+
import json
|
9 |
+
import time
|
10 |
+
output_mp3="output.mp3"
|
11 |
|
12 |
+
def upload_image(img: str, d_id_key: str):
|
13 |
+
url = "https://api.d-id.com/images"
|
14 |
|
15 |
+
files = {"image": ("hero.jpg", open(img, "rb"), "image/jpg")}
|
16 |
+
headers = {
|
17 |
+
"accept": "application/json",
|
18 |
+
"authorization": "Basic "+d_id_key
|
19 |
+
}
|
20 |
+
|
21 |
+
response = requests.post(url, files=files, headers=headers)
|
22 |
+
|
23 |
+
response_dict = response.json()
|
24 |
+
img_url = response_dict["url"]
|
25 |
+
# return audio_url
|
26 |
+
print(img_url)
|
27 |
+
return img_url
|
28 |
+
|
29 |
+
def upload_audio(audio: str, d_id_key: str):
|
30 |
+
url = "https://api.d-id.com/audios"
|
31 |
+
|
32 |
+
files = {"audio": (audio, open(audio, "rb"), "audio/mpeg")}
|
33 |
+
headers = {
|
34 |
+
"accept": "application/json",
|
35 |
+
"authorization": "Basic "+d_id_key
|
36 |
+
}
|
37 |
+
response = requests.post(url, files=files, headers=headers)
|
38 |
+
response_dict = response.json()
|
39 |
+
audio_url = response_dict["url"]
|
40 |
+
# return audio_url
|
41 |
+
print(audio_url)
|
42 |
+
return audio_url
|
43 |
+
|
44 |
+
def get_did_video(process_video_url,d_id_key):
|
45 |
+
url = "https://api.d-id.com/talks/"+process_video_url
|
46 |
+
|
47 |
+
headers = {
|
48 |
+
"accept": "application/json",
|
49 |
+
"authorization": "Basic "+d_id_key
|
50 |
+
}
|
51 |
+
response_dict = {}
|
52 |
+
|
53 |
+
while "result_url" not in response_dict:
|
54 |
+
# make API call and get response dictionary
|
55 |
+
response = requests.get(url, headers=headers)
|
56 |
+
response_dict = response.json()
|
57 |
+
|
58 |
+
print(response.text)
|
59 |
+
|
60 |
+
# wait for 1 second before checking again
|
61 |
+
time.sleep(1)
|
62 |
+
|
63 |
+
# "result_url" key is now present in the dictionary
|
64 |
+
result_url = response_dict["result_url"]
|
65 |
+
|
66 |
+
print("From did_video \n\n\n")
|
67 |
+
print("/n/n/n")
|
68 |
+
|
69 |
+
# response_dict = response.json()
|
70 |
+
result_url = response_dict["result_url"]
|
71 |
+
|
72 |
+
print(result_url)
|
73 |
+
return result_url
|
74 |
+
|
75 |
+
def text_to_speach_api(text: str, elv_key,voice_id: str):
|
76 |
+
url = "https://api.elevenlabs.io/v1/text-to-speech/"+voice_id+"/stream"
|
77 |
+
headers = {
|
78 |
+
"accept": "*/*",
|
79 |
+
"xi-api-key": elv_key,
|
80 |
+
"Content-Type": "application/json",
|
81 |
+
}
|
82 |
+
data = {
|
83 |
+
"text": text,
|
84 |
+
"voice_settings": {
|
85 |
+
"stability": 0,
|
86 |
+
"similarity_boost": 0
|
87 |
+
}
|
88 |
+
}
|
89 |
+
|
90 |
+
response = requests.post(url, headers=headers, json=data)
|
91 |
+
# print(response.text)
|
92 |
+
|
93 |
+
if response.ok:
|
94 |
+
with open("output.mp3", "wb") as f:
|
95 |
+
f.write(response.content)
|
96 |
+
else:
|
97 |
+
print("Error: ", response.text)
|
98 |
+
|
99 |
+
def get_voice_names():
|
100 |
+
with open("data.json") as f:
|
101 |
+
data = json.load(f)
|
102 |
+
return [voice["name"] for voice in data["voices"]]
|
103 |
+
|
104 |
+
|
105 |
+
|
106 |
+
# define a function to get voice id by name
|
107 |
+
def get_voice_id(name):
|
108 |
+
# load the JSON data
|
109 |
+
with open("data.json") as f:
|
110 |
+
data = json.load(f)
|
111 |
+
for voice in data['voices']:
|
112 |
+
if voice['name'] == name:
|
113 |
+
return voice['voice_id']
|
114 |
+
return None
|
115 |
+
|
116 |
+
#D-id API
|
117 |
+
def d_id_api(image_url, d_id_key,audio_url):
|
118 |
+
print("D-id API")
|
119 |
+
url = "https://api.d-id.com/talks"
|
120 |
+
payload = {
|
121 |
+
|
122 |
+
"source_url": image_url,
|
123 |
+
"script": {
|
124 |
+
"type": "audio",
|
125 |
+
"audio_url": audio_url,
|
126 |
+
}
|
127 |
+
|
128 |
+
}
|
129 |
+
headers = {
|
130 |
+
"accept": "application/json",
|
131 |
+
"content-type": "application/json",
|
132 |
+
"authorization": "Basic "+d_id_key
|
133 |
+
}
|
134 |
+
|
135 |
+
response = requests.post(url, json=payload, headers=headers)
|
136 |
+
print("From D-id API \n\n\n")
|
137 |
+
print(response.text)
|
138 |
+
response_dict = response.json()
|
139 |
+
process_video = response_dict["id"]
|
140 |
+
# return audio_url
|
141 |
+
print(process_video)
|
142 |
+
return process_video
|
143 |
+
|
144 |
+
|
145 |
+
|
146 |
+
def transcribe_video(d_id_key: str, elv_key: str, full_text: str,voice_name: str,img):
|
147 |
+
print(voice_name)
|
148 |
+
voice_id=get_voice_id(voice_name)
|
149 |
+
text_to_speach_api(full_text, elv_key,voice_id)
|
150 |
+
audio_url=upload_audio(output_mp3,d_id_key)
|
151 |
+
image_url=upload_image(img,d_id_key)
|
152 |
+
process_video_url=d_id_api(image_url, d_id_key,audio_url)
|
153 |
+
video_url=get_did_video(process_video_url,d_id_key)
|
154 |
+
file_name = 'hero.mp4'
|
155 |
+
urllib.request.urlretrieve(video_url, file_name)
|
156 |
+
return file_name
|
157 |
+
|
158 |
+
|
159 |
+
examples = [["", "","Good morning, it's great to see you! I hope you're having a wonderful day. I just wanted to say thank you for taking the time to speak with me. Is there anything new or exciting happening in your life? I'd love to hear about it. Let's catch up soon!",
|
160 |
+
"Arnold","./images/hero.jpg"],["","","Hello there, I'm a talking photo! I can speak any text you type here. Try it out!", "Domi","./images/3.jpg"],["","","Hello there, I'm a talking photo! I can speak any text you type here. Try it out!", "Domi","./images/2.jpg"]]
|
161 |
|
|
|
|
|
162 |
demo = gr.Interface(fn=transcribe_video, inputs=[
|
163 |
gr.Textbox(label="D-Id API Key",placeholder="Paste your D-Id",type='password'),
|
164 |
gr.Textbox(label="Elevenlabs API Keys",placeholder="Paste Elevenlabs",type='password'),
|
165 |
gr.Textbox(lines=4, label=" Please input the text you wish to generate in order to make the photo speak.", placeholder="English Text here"),
|
166 |
+
gr.Dropdown(choices=get_voice_names(), label="Select a voice"),
|
167 |
+
gr.Image(label="photo of a Person", type="filepath")
|
168 |
+
], outputs="video",title="Bring your images to life with the talking animation feature now!",examples=examples)
|
169 |
|
170 |
demo.launch()
|
data.json
ADDED
@@ -0,0 +1,192 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"voices": [{
|
3 |
+
"voice_id": "21m00Tcm4TlvDq8ikWAM",
|
4 |
+
"name": "Rachel",
|
5 |
+
"samples": null,
|
6 |
+
"category": "premade",
|
7 |
+
"fine_tuning": {
|
8 |
+
"model_id": null,
|
9 |
+
"is_allowed_to_fine_tune": false,
|
10 |
+
"fine_tuning_requested": false,
|
11 |
+
"finetuning_state": "not_started",
|
12 |
+
"verification_attempts": null,
|
13 |
+
"verification_failures": [],
|
14 |
+
"verification_attempts_count": 0,
|
15 |
+
"slice_ids": null
|
16 |
+
},
|
17 |
+
"labels": {},
|
18 |
+
"description": null,
|
19 |
+
"preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/21m00Tcm4TlvDq8ikWAM/6edb9076-c3e4-420c-b6ab-11d43fe341c8.mp3",
|
20 |
+
"available_for_tiers": [],
|
21 |
+
"settings": null
|
22 |
+
},
|
23 |
+
{
|
24 |
+
"voice_id": "AZnzlk1XvdvUeBnXmlld",
|
25 |
+
"name": "Domi",
|
26 |
+
"samples": null,
|
27 |
+
"category": "premade",
|
28 |
+
"fine_tuning": {
|
29 |
+
"model_id": null,
|
30 |
+
"is_allowed_to_fine_tune": false,
|
31 |
+
"fine_tuning_requested": false,
|
32 |
+
"finetuning_state": "not_started",
|
33 |
+
"verification_attempts": null,
|
34 |
+
"verification_failures": [],
|
35 |
+
"verification_attempts_count": 0,
|
36 |
+
"slice_ids": null
|
37 |
+
},
|
38 |
+
"labels": {},
|
39 |
+
"description": null,
|
40 |
+
"preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/AZnzlk1XvdvUeBnXmlld/69c5373f-0dc2-4efd-9232-a0140182c0a9.mp3",
|
41 |
+
"available_for_tiers": [],
|
42 |
+
"settings": null
|
43 |
+
},
|
44 |
+
{
|
45 |
+
"voice_id": "EXAVITQu4vr4xnSDxMaL",
|
46 |
+
"name": "Bella",
|
47 |
+
"samples": null,
|
48 |
+
"category": "premade",
|
49 |
+
"fine_tuning": {
|
50 |
+
"model_id": null,
|
51 |
+
"is_allowed_to_fine_tune": false,
|
52 |
+
"fine_tuning_requested": false,
|
53 |
+
"finetuning_state": "not_started",
|
54 |
+
"verification_attempts": null,
|
55 |
+
"verification_failures": [],
|
56 |
+
"verification_attempts_count": 0,
|
57 |
+
"slice_ids": null
|
58 |
+
},
|
59 |
+
"labels": {},
|
60 |
+
"description": null,
|
61 |
+
"preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/EXAVITQu4vr4xnSDxMaL/04365bce-98cc-4e99-9f10-56b60680cda9.mp3",
|
62 |
+
"available_for_tiers": [],
|
63 |
+
"settings": null
|
64 |
+
},
|
65 |
+
{
|
66 |
+
"voice_id": "ErXwobaYiN019PkySvjV",
|
67 |
+
"name": "Antoni",
|
68 |
+
"samples": null,
|
69 |
+
"category": "premade",
|
70 |
+
"fine_tuning": {
|
71 |
+
"model_id": null,
|
72 |
+
"is_allowed_to_fine_tune": false,
|
73 |
+
"fine_tuning_requested": false,
|
74 |
+
"finetuning_state": "not_started",
|
75 |
+
"verification_attempts": null,
|
76 |
+
"verification_failures": [],
|
77 |
+
"verification_attempts_count": 0,
|
78 |
+
"slice_ids": null
|
79 |
+
},
|
80 |
+
"labels": {},
|
81 |
+
"description": null,
|
82 |
+
"preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/ErXwobaYiN019PkySvjV/38d8f8f0-1122-4333-b323-0b87478d506a.mp3",
|
83 |
+
"available_for_tiers": [],
|
84 |
+
"settings": null
|
85 |
+
},
|
86 |
+
{
|
87 |
+
"voice_id": "MF3mGyEYCl7XYWbV9V6O",
|
88 |
+
"name": "Elli",
|
89 |
+
"samples": null,
|
90 |
+
"category": "premade",
|
91 |
+
"fine_tuning": {
|
92 |
+
"model_id": null,
|
93 |
+
"is_allowed_to_fine_tune": false,
|
94 |
+
"fine_tuning_requested": false,
|
95 |
+
"finetuning_state": "not_started",
|
96 |
+
"verification_attempts": null,
|
97 |
+
"verification_failures": [],
|
98 |
+
"verification_attempts_count": 0,
|
99 |
+
"slice_ids": null
|
100 |
+
},
|
101 |
+
"labels": {},
|
102 |
+
"description": null,
|
103 |
+
"preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/MF3mGyEYCl7XYWbV9V6O/f9fd64c3-5d62-45cd-b0dc-ad722ee3284e.mp3",
|
104 |
+
"available_for_tiers": [],
|
105 |
+
"settings": null
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"voice_id": "TxGEqnHWrfWFTfGW9XjX",
|
109 |
+
"name": "Josh",
|
110 |
+
"samples": null,
|
111 |
+
"category": "premade",
|
112 |
+
"fine_tuning": {
|
113 |
+
"model_id": null,
|
114 |
+
"is_allowed_to_fine_tune": false,
|
115 |
+
"fine_tuning_requested": false,
|
116 |
+
"finetuning_state": "not_started",
|
117 |
+
"verification_attempts": null,
|
118 |
+
"verification_failures": [],
|
119 |
+
"verification_attempts_count": 0,
|
120 |
+
"slice_ids": null
|
121 |
+
},
|
122 |
+
"labels": {},
|
123 |
+
"description": null,
|
124 |
+
"preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/TxGEqnHWrfWFTfGW9XjX/c6c80dcd-5fe5-4a4c-a74c-b3fec4c62c67.mp3",
|
125 |
+
"available_for_tiers": [],
|
126 |
+
"settings": null
|
127 |
+
},
|
128 |
+
{
|
129 |
+
"voice_id": "VR6AewLTigWG4xSOukaG",
|
130 |
+
"name": "Arnold",
|
131 |
+
"samples": null,
|
132 |
+
"category": "premade",
|
133 |
+
"fine_tuning": {
|
134 |
+
"model_id": null,
|
135 |
+
"is_allowed_to_fine_tune": false,
|
136 |
+
"fine_tuning_requested": false,
|
137 |
+
"finetuning_state": "not_started",
|
138 |
+
"verification_attempts": null,
|
139 |
+
"verification_failures": [],
|
140 |
+
"verification_attempts_count": 0,
|
141 |
+
"slice_ids": null
|
142 |
+
},
|
143 |
+
"labels": {},
|
144 |
+
"description": null,
|
145 |
+
"preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/VR6AewLTigWG4xSOukaG/66e83dc2-6543-4897-9283-e028ac5ae4aa.mp3",
|
146 |
+
"available_for_tiers": [],
|
147 |
+
"settings": null
|
148 |
+
},
|
149 |
+
{
|
150 |
+
"voice_id": "pNInz6obpgDQGcFmaJgB",
|
151 |
+
"name": "Adam",
|
152 |
+
"samples": null,
|
153 |
+
"category": "premade",
|
154 |
+
"fine_tuning": {
|
155 |
+
"model_id": null,
|
156 |
+
"is_allowed_to_fine_tune": false,
|
157 |
+
"fine_tuning_requested": false,
|
158 |
+
"finetuning_state": "not_started",
|
159 |
+
"verification_attempts": null,
|
160 |
+
"verification_failures": [],
|
161 |
+
"verification_attempts_count": 0,
|
162 |
+
"slice_ids": null
|
163 |
+
},
|
164 |
+
"labels": {},
|
165 |
+
"description": null,
|
166 |
+
"preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/pNInz6obpgDQGcFmaJgB/e0b45450-78db-49b9-aaa4-d5358a6871bd.mp3",
|
167 |
+
"available_for_tiers": [],
|
168 |
+
"settings": null
|
169 |
+
},
|
170 |
+
{
|
171 |
+
"voice_id": "yoZ06aMxZJJ28mfd3POQ",
|
172 |
+
"name": "Sam",
|
173 |
+
"samples": null,
|
174 |
+
"category": "premade",
|
175 |
+
"fine_tuning": {
|
176 |
+
"model_id": null,
|
177 |
+
"is_allowed_to_fine_tune": false,
|
178 |
+
"fine_tuning_requested": false,
|
179 |
+
"finetuning_state": "not_started",
|
180 |
+
"verification_attempts": null,
|
181 |
+
"verification_failures": [],
|
182 |
+
"verification_attempts_count": 0,
|
183 |
+
"slice_ids": null
|
184 |
+
},
|
185 |
+
"labels": {},
|
186 |
+
"description": null,
|
187 |
+
"preview_url": "https://storage.googleapis.com/eleven-public-prod/premade/voices/yoZ06aMxZJJ28mfd3POQ/1c4d417c-ba80-4de8-874a-a1c57987ea63.mp3",
|
188 |
+
"available_for_tiers": [],
|
189 |
+
"settings": null
|
190 |
+
}
|
191 |
+
]
|
192 |
+
}
|
images/2.jpg
ADDED
![]() |
images/3.jpg
ADDED
![]() |
images/hero.jpg
ADDED
![]() |