Spaces:
Configuration error
Configuration error
update minio bucket
Browse files- config/settings.py +7 -0
- config/urls.py +1 -2
- db.sqlite3 +0 -0
- requirements.txt +1 -0
- texttovoice/migrations/0001_initial.py +3 -3
- texttovoice/minio_utils.py +12 -0
- texttovoice/models.py +2 -2
- texttovoice/serializers.py +33 -0
- texttovoice/urls.py +8 -0
- texttovoice/views.py +127 -82
config/settings.py
CHANGED
@@ -181,6 +181,13 @@ STATIC_URL = 'static/'
|
|
181 |
MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
|
182 |
MEDIA_URL = '/media/'
|
183 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
184 |
# Default primary key field type
|
185 |
# https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field
|
186 |
|
|
|
181 |
MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
|
182 |
MEDIA_URL = '/media/'
|
183 |
|
184 |
+
# settings.py
|
185 |
+
|
186 |
+
MINIO_ENDPOINT = '64.176.199.162:9000'
|
187 |
+
MINIO_ACCESS_KEY = 'voice-clone'
|
188 |
+
MINIO_SECRET_KEY = 'voice-clone'
|
189 |
+
MINIO_SECURE = False # Change to True if using HTTPS
|
190 |
+
|
191 |
# Default primary key field type
|
192 |
# https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field
|
193 |
|
config/urls.py
CHANGED
@@ -22,12 +22,11 @@ schema_view = get_schema_view(
|
|
22 |
)
|
23 |
|
24 |
|
25 |
-
from texttovoice.views import TextToSpeechCreateView
|
26 |
|
27 |
urlpatterns = [
|
28 |
path('admin/', admin.site.urls),
|
29 |
path('auth/', include('accounts.urls')),
|
30 |
-
path('
|
31 |
path('', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'),
|
32 |
|
33 |
]
|
|
|
22 |
)
|
23 |
|
24 |
|
|
|
25 |
|
26 |
urlpatterns = [
|
27 |
path('admin/', admin.site.urls),
|
28 |
path('auth/', include('accounts.urls')),
|
29 |
+
path('speech/', include('texttovoice.urls')),
|
30 |
path('', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'),
|
31 |
|
32 |
]
|
db.sqlite3
CHANGED
Binary files a/db.sqlite3 and b/db.sqlite3 differ
|
|
requirements.txt
CHANGED
@@ -13,6 +13,7 @@ Django
|
|
13 |
djangorestframework
|
14 |
drf-yasg
|
15 |
django-cors-headers
|
|
|
16 |
|
17 |
# fastapi==0.70.0
|
18 |
# uvicorn==0.15.0
|
|
|
13 |
djangorestframework
|
14 |
drf-yasg
|
15 |
django-cors-headers
|
16 |
+
minio
|
17 |
|
18 |
# fastapi==0.70.0
|
19 |
# uvicorn==0.15.0
|
texttovoice/migrations/0001_initial.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# Generated by Django 5.0.1 on 2024-
|
2 |
|
3 |
import django.db.models.deletion
|
4 |
from django.conf import settings
|
@@ -20,8 +20,8 @@ class Migration(migrations.Migration):
|
|
20 |
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
21 |
('created_date', models.DateTimeField(auto_now_add=True)),
|
22 |
('text', models.CharField(default='In the quest for a sustainable future, renewable energy emerges as a beacon of hope', max_length=255)),
|
23 |
-
('speaker_wav', models.
|
24 |
-
('output_wav', models.
|
25 |
('language', models.CharField(default='en', max_length=2)),
|
26 |
('created_by', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='%(class)s_created_by', to=settings.AUTH_USER_MODEL)),
|
27 |
],
|
|
|
1 |
+
# Generated by Django 5.0.1 on 2024-02-07 16:14
|
2 |
|
3 |
import django.db.models.deletion
|
4 |
from django.conf import settings
|
|
|
20 |
('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
|
21 |
('created_date', models.DateTimeField(auto_now_add=True)),
|
22 |
('text', models.CharField(default='In the quest for a sustainable future, renewable energy emerges as a beacon of hope', max_length=255)),
|
23 |
+
('speaker_wav', models.CharField(max_length=255)),
|
24 |
+
('output_wav', models.CharField(max_length=255)),
|
25 |
('language', models.CharField(default='en', max_length=2)),
|
26 |
('created_by', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='%(class)s_created_by', to=settings.AUTH_USER_MODEL)),
|
27 |
],
|
texttovoice/minio_utils.py
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# minio_utils.py
|
2 |
+
|
3 |
+
from minio import Minio
|
4 |
+
from django.conf import settings
|
5 |
+
|
6 |
+
def get_minio_client():
|
7 |
+
return Minio(
|
8 |
+
settings.MINIO_ENDPOINT,
|
9 |
+
access_key=settings.MINIO_ACCESS_KEY,
|
10 |
+
secret_key=settings.MINIO_SECRET_KEY,
|
11 |
+
secure=settings.MINIO_SECURE
|
12 |
+
)
|
texttovoice/models.py
CHANGED
@@ -17,8 +17,8 @@ class TextToSpeech(BaseModel):
|
|
17 |
max_length=255,
|
18 |
default="In the quest for a sustainable future, renewable energy emerges as a beacon of hope"
|
19 |
)
|
20 |
-
speaker_wav = models.
|
21 |
-
output_wav = models.
|
22 |
language = models.CharField(
|
23 |
max_length=2, # Adjust the max length based on your language code requirements
|
24 |
default="en"
|
|
|
17 |
max_length=255,
|
18 |
default="In the quest for a sustainable future, renewable energy emerges as a beacon of hope"
|
19 |
)
|
20 |
+
speaker_wav = models.CharField( max_length=255)
|
21 |
+
output_wav = models.CharField( max_length=255)
|
22 |
language = models.CharField(
|
23 |
max_length=2, # Adjust the max length based on your language code requirements
|
24 |
default="en"
|
texttovoice/serializers.py
CHANGED
@@ -1,6 +1,39 @@
|
|
1 |
from rest_framework import serializers
|
|
|
2 |
|
3 |
class TextToSpeechSerializer(serializers.Serializer):
|
4 |
text = serializers.CharField(default="In the quest for a sustainable future, renewable energy emerges as a beacon of hope")
|
5 |
speaker_wav = serializers.FileField()
|
6 |
language = serializers.CharField(default="en")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
from rest_framework import serializers
|
2 |
+
from .models import TextToSpeech
|
3 |
|
4 |
class TextToSpeechSerializer(serializers.Serializer):
|
5 |
text = serializers.CharField(default="In the quest for a sustainable future, renewable energy emerges as a beacon of hope")
|
6 |
speaker_wav = serializers.FileField()
|
7 |
language = serializers.CharField(default="en")
|
8 |
+
|
9 |
+
|
10 |
+
class TextToSpeechSerializerResponse(serializers.ModelSerializer):
|
11 |
+
created_by = serializers.SerializerMethodField()
|
12 |
+
|
13 |
+
class Meta:
|
14 |
+
model = TextToSpeech
|
15 |
+
fields = ['id', 'text', 'speaker_wav', 'output_wav', 'language', 'created_by']
|
16 |
+
# Ensure that all the fields you want to include are listed here
|
17 |
+
|
18 |
+
def get_created_by(self, obj):
|
19 |
+
return obj.created_by.username if obj.created_by else None
|
20 |
+
|
21 |
+
class TextToSpeechSerializerResponseWithURL(serializers.ModelSerializer):
|
22 |
+
speaker_wav_url = serializers.SerializerMethodField()
|
23 |
+
output_wav_url = serializers.SerializerMethodField()
|
24 |
+
created_by = serializers.SerializerMethodField()
|
25 |
+
|
26 |
+
|
27 |
+
class Meta:
|
28 |
+
model = TextToSpeech
|
29 |
+
fields = ['id', 'text', 'speaker_wav_url', 'output_wav_url', 'language', 'created_by']
|
30 |
+
# Add or remove fields as necessary
|
31 |
+
|
32 |
+
def get_speaker_wav_url(self, obj):
|
33 |
+
return self.context['view'].generate_presigned_url(obj.speaker_wav)
|
34 |
+
|
35 |
+
def get_output_wav_url(self, obj):
|
36 |
+
return self.context['view'].generate_presigned_url(obj.output_wav)
|
37 |
+
|
38 |
+
def get_created_by(self, obj):
|
39 |
+
return obj.created_by.username if obj.created_by else None
|
texttovoice/urls.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from django.urls import path
|
2 |
+
from . import views
|
3 |
+
|
4 |
+
urlpatterns = [
|
5 |
+
path('generate-speech/', views.TextToSpeechCreateView.as_view(), name='Generate speech'),
|
6 |
+
path('current_user_speech', views.TextToSpeechListView.as_view(), name="Get all speech of Curent User"),
|
7 |
+
]
|
8 |
+
|
texttovoice/views.py
CHANGED
@@ -1,106 +1,151 @@
|
|
|
|
1 |
import os
|
2 |
import uuid
|
3 |
-
import
|
4 |
-
import logging # Import the logging module
|
5 |
import torch
|
6 |
-
from django.http import FileResponse
|
7 |
from rest_framework import status
|
8 |
from rest_framework.response import Response
|
9 |
-
from rest_framework.generics import CreateAPIView
|
10 |
-
from TTS.api import TTS
|
11 |
from rest_framework.authentication import TokenAuthentication
|
12 |
from rest_framework.permissions import IsAuthenticated
|
13 |
from texttovoice.models import TextToSpeech
|
14 |
-
from .serializers import TextToSpeechSerializer
|
15 |
from rest_framework.parsers import MultiPartParser
|
16 |
-
from drf_yasg import openapi
|
17 |
from drf_yasg.utils import swagger_auto_schema
|
|
|
|
|
|
|
18 |
|
|
|
19 |
|
20 |
-
# Initialize logger at module level
|
21 |
logger = logging.getLogger(__name__)
|
|
|
22 |
|
23 |
class TextToSpeechCreateView(CreateAPIView):
|
24 |
serializer_class = TextToSpeechSerializer
|
25 |
-
authentication_classes = [TokenAuthentication]
|
26 |
-
permission_classes = [IsAuthenticated]
|
27 |
-
|
28 |
parser_classes = [MultiPartParser]
|
29 |
|
30 |
@swagger_auto_schema(
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
'
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
)
|
48 |
-
|
49 |
def create(self, request, *args, **kwargs):
|
50 |
serializer = self.get_serializer(data=request.data)
|
51 |
if serializer.is_valid():
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
98 |
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
return response
|
103 |
|
104 |
-
|
105 |
-
|
106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from datetime import timedelta
|
2 |
import os
|
3 |
import uuid
|
4 |
+
import logging
|
|
|
5 |
import torch
|
|
|
6 |
from rest_framework import status
|
7 |
from rest_framework.response import Response
|
8 |
+
from rest_framework.generics import CreateAPIView,ListAPIView
|
9 |
+
from TTS.api import TTS # Ensure this import is correct based on your TTS library/package
|
10 |
from rest_framework.authentication import TokenAuthentication
|
11 |
from rest_framework.permissions import IsAuthenticated
|
12 |
from texttovoice.models import TextToSpeech
|
13 |
+
from .serializers import TextToSpeechSerializer, TextToSpeechSerializerResponse ,TextToSpeechSerializerResponseWithURL # Ensure this import matches your file structure
|
14 |
from rest_framework.parsers import MultiPartParser
|
|
|
15 |
from drf_yasg.utils import swagger_auto_schema
|
16 |
+
from drf_yasg import openapi
|
17 |
+
from rest_framework.exceptions import NotFound as NOT_FOUND
|
18 |
+
from .minio_utils import get_minio_client # Ensure this import matches your file structure
|
19 |
|
20 |
+
minio_client = get_minio_client()
|
21 |
|
|
|
22 |
logger = logging.getLogger(__name__)
|
23 |
+
BUCKET_NAME = "voice-clone"
|
24 |
|
25 |
class TextToSpeechCreateView(CreateAPIView):
|
26 |
serializer_class = TextToSpeechSerializer
|
27 |
+
authentication_classes = [TokenAuthentication]
|
28 |
+
permission_classes = [IsAuthenticated]
|
|
|
29 |
parser_classes = [MultiPartParser]
|
30 |
|
31 |
@swagger_auto_schema(
|
32 |
+
operation_id='Create a document',
|
33 |
+
operation_description='Create a document by providing file and s3_key',
|
34 |
+
manual_parameters=[
|
35 |
+
openapi.Parameter('file', openapi.IN_FORM, type=openapi.TYPE_FILE, description='Document to be uploaded'),
|
36 |
+
openapi.Parameter('s3_key', openapi.IN_FORM, type=openapi.TYPE_STRING, description='S3 Key of the Document (folders along with name)')
|
37 |
+
],
|
38 |
+
responses={
|
39 |
+
status.HTTP_200_OK: openapi.Response(
|
40 |
+
'Success', schema=openapi.Schema(type=openapi.TYPE_OBJECT, properties={
|
41 |
+
'doc_id': openapi.Schema(type=openapi.TYPE_STRING, description='Document ID'),
|
42 |
+
'mime_type': openapi.Schema(type=openapi.TYPE_STRING, description='Mime Type of the Document'),
|
43 |
+
'version_id': openapi.Schema(type=openapi.TYPE_STRING, description='S3 version ID of the document')
|
44 |
+
})
|
45 |
+
)
|
46 |
+
}
|
47 |
+
)
|
|
|
|
|
48 |
def create(self, request, *args, **kwargs):
|
49 |
serializer = self.get_serializer(data=request.data)
|
50 |
if serializer.is_valid():
|
51 |
+
try:
|
52 |
+
gpu_available = torch.cuda.is_available()
|
53 |
+
text = serializer.validated_data.get("text")
|
54 |
+
speaker_wav = serializer.validated_data.get("speaker_wav")
|
55 |
+
language = serializer.validated_data.get("language")
|
56 |
+
|
57 |
+
# Temporary file paths
|
58 |
+
speaker_file_path = os.path.join('/tmp', f"{uuid.uuid4()}{speaker_wav.name}")
|
59 |
+
output_filename = os.path.join('/tmp', f"{uuid.uuid4()}.wav")
|
60 |
+
|
61 |
+
# Save speaker WAV file
|
62 |
+
with open(speaker_file_path, "wb") as destination:
|
63 |
+
for chunk in speaker_wav.chunks():
|
64 |
+
destination.write(chunk)
|
65 |
+
|
66 |
+
# TTS processing
|
67 |
+
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu_available)
|
68 |
+
tts.tts_to_file(text=text, file_path=output_filename, speaker_wav=speaker_file_path, language=language)
|
69 |
+
|
70 |
+
# Upload files to MinIO and cleanup
|
71 |
+
public_url,speaker_wav_path = self.upload_file_to_minio(speaker_file_path, 'speakers/')
|
72 |
+
public_url_output ,output_wav_path = self.upload_file_to_minio(output_filename, 'output/')
|
73 |
+
|
74 |
+
# Create DB entry
|
75 |
+
tts_instance = TextToSpeech.objects.create(
|
76 |
+
text=text,
|
77 |
+
speaker_wav=speaker_wav_path,
|
78 |
+
output_wav=output_wav_path,
|
79 |
+
language=language,
|
80 |
+
created_by=request.user
|
81 |
+
)
|
82 |
+
|
83 |
+
# Serialize and return the created instance
|
84 |
+
response_serializer = TextToSpeechSerializerResponse(tts_instance)
|
85 |
+
response_data = {
|
86 |
+
**response_serializer.data,
|
87 |
+
"speaker_wav": public_url,
|
88 |
+
"output_wav": public_url_output
|
89 |
+
}
|
90 |
+
|
91 |
+
return Response(response_data, status=status.HTTP_201_CREATED)
|
92 |
+
except Exception as e:
|
93 |
+
logger.error(f"Error processing request: {str(e)}")
|
94 |
+
return Response({"error": "An error occurred processing your request."}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
|
95 |
+
finally:
|
96 |
+
# Ensure cleanup happens
|
97 |
+
self.cleanup_files([speaker_file_path, output_filename])
|
98 |
+
else:
|
99 |
+
return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
|
100 |
+
|
101 |
+
def upload_file_to_minio(self, file_path, prefix):
|
102 |
+
"""Uploads a file to MinIO and returns a pre-signed URL for secure, temporary access."""
|
103 |
+
file_name = os.path.basename(file_path)
|
104 |
+
object_name = f"{prefix}{file_name}"
|
105 |
+
with open(file_path, "rb") as file_data:
|
106 |
+
minio_client.put_object(BUCKET_NAME, object_name, file_data, os.path.getsize(file_path))
|
107 |
+
|
108 |
+
# Generate a pre-signed URL for the uploaded object
|
109 |
+
pre_signed_url = minio_client.presigned_get_object(BUCKET_NAME, object_name, expires=timedelta(days=1))
|
110 |
+
return pre_signed_url ,f"{BUCKET_NAME}/{object_name}"
|
111 |
+
|
112 |
+
|
113 |
+
def cleanup_files(self, file_paths):
|
114 |
+
"""Removes files from the filesystem."""
|
115 |
+
for file_path in file_paths:
|
116 |
+
try:
|
117 |
+
os.remove(file_path)
|
118 |
+
except Exception as e:
|
119 |
+
logger.error(f"Failed to delete temporary file {file_path}: {e}")
|
120 |
+
|
121 |
+
|
122 |
+
|
123 |
+
|
124 |
+
class TextToSpeechListView(ListAPIView):
|
125 |
+
serializer_class = TextToSpeechSerializerResponseWithURL
|
126 |
+
authentication_classes = [TokenAuthentication]
|
127 |
+
permission_classes = [IsAuthenticated]
|
128 |
+
|
129 |
+
def get_queryset(self):
|
130 |
+
return TextToSpeech.objects.filter(created_by=self.request.user)
|
131 |
+
|
132 |
+
def list(self, request, *args, **kwargs):
|
133 |
+
queryset = self.get_queryset()
|
134 |
+
|
135 |
+
if not queryset.exists():
|
136 |
+
raise NOT_FOUND('No text-to-speech data found for the current user.')
|
137 |
|
138 |
+
# Directly serialize the data, pre-signed URLs are handled by the serializer
|
139 |
+
serializer = self.get_serializer(queryset, many=True, context={'view': self})
|
140 |
+
return Response(serializer.data, status=status.HTTP_200_OK)
|
|
|
141 |
|
142 |
+
def generate_presigned_url(self, object_path):
|
143 |
+
# Ensure this logic correctly splits your `object_path` to get the bucket name and object name
|
144 |
+
# This example assumes `object_path` is in the format "bucket_name/object_name"
|
145 |
+
try:
|
146 |
+
bucket, object_name = object_path.split('/', 1)
|
147 |
+
presigned_url = minio_client.presigned_get_object(bucket, object_name, expires=timedelta(hours=1))
|
148 |
+
return presigned_url
|
149 |
+
except Exception as e:
|
150 |
+
logger.error(f"Failed to generate presigned URL for {object_path}: {e}")
|
151 |
+
return None
|