renator commited on
Commit
df4965a
·
1 Parent(s): 4b98fcb

update minio bucket

Browse files
config/settings.py CHANGED
@@ -181,6 +181,13 @@ STATIC_URL = 'static/'
181
  MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
182
  MEDIA_URL = '/media/'
183
 
 
 
 
 
 
 
 
184
  # Default primary key field type
185
  # https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field
186
 
 
181
  MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
182
  MEDIA_URL = '/media/'
183
 
184
+ # settings.py
185
+
186
+ MINIO_ENDPOINT = '64.176.199.162:9000'
187
+ MINIO_ACCESS_KEY = 'voice-clone'
188
+ MINIO_SECRET_KEY = 'voice-clone'
189
+ MINIO_SECURE = False # Change to True if using HTTPS
190
+
191
  # Default primary key field type
192
  # https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field
193
 
config/urls.py CHANGED
@@ -22,12 +22,11 @@ schema_view = get_schema_view(
22
  )
23
 
24
 
25
- from texttovoice.views import TextToSpeechCreateView
26
 
27
  urlpatterns = [
28
  path('admin/', admin.site.urls),
29
  path('auth/', include('accounts.urls')),
30
- path('generate-speech/', TextToSpeechCreateView.as_view(), name='generate-speech-create'),
31
  path('', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'),
32
 
33
  ]
 
22
  )
23
 
24
 
 
25
 
26
  urlpatterns = [
27
  path('admin/', admin.site.urls),
28
  path('auth/', include('accounts.urls')),
29
+ path('speech/', include('texttovoice.urls')),
30
  path('', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'),
31
 
32
  ]
db.sqlite3 CHANGED
Binary files a/db.sqlite3 and b/db.sqlite3 differ
 
requirements.txt CHANGED
@@ -13,6 +13,7 @@ Django
13
  djangorestframework
14
  drf-yasg
15
  django-cors-headers
 
16
 
17
  # fastapi==0.70.0
18
  # uvicorn==0.15.0
 
13
  djangorestframework
14
  drf-yasg
15
  django-cors-headers
16
+ minio
17
 
18
  # fastapi==0.70.0
19
  # uvicorn==0.15.0
texttovoice/migrations/0001_initial.py CHANGED
@@ -1,4 +1,4 @@
1
- # Generated by Django 5.0.1 on 2024-01-25 15:38
2
 
3
  import django.db.models.deletion
4
  from django.conf import settings
@@ -20,8 +20,8 @@ class Migration(migrations.Migration):
20
  ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
21
  ('created_date', models.DateTimeField(auto_now_add=True)),
22
  ('text', models.CharField(default='In the quest for a sustainable future, renewable energy emerges as a beacon of hope', max_length=255)),
23
- ('speaker_wav', models.FileField(upload_to='speaker_wav/')),
24
- ('output_wav', models.FileField(upload_to='output_wav/')),
25
  ('language', models.CharField(default='en', max_length=2)),
26
  ('created_by', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='%(class)s_created_by', to=settings.AUTH_USER_MODEL)),
27
  ],
 
1
+ # Generated by Django 5.0.1 on 2024-02-07 16:14
2
 
3
  import django.db.models.deletion
4
  from django.conf import settings
 
20
  ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
21
  ('created_date', models.DateTimeField(auto_now_add=True)),
22
  ('text', models.CharField(default='In the quest for a sustainable future, renewable energy emerges as a beacon of hope', max_length=255)),
23
+ ('speaker_wav', models.CharField(max_length=255)),
24
+ ('output_wav', models.CharField(max_length=255)),
25
  ('language', models.CharField(default='en', max_length=2)),
26
  ('created_by', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='%(class)s_created_by', to=settings.AUTH_USER_MODEL)),
27
  ],
texttovoice/minio_utils.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # minio_utils.py
2
+
3
+ from minio import Minio
4
+ from django.conf import settings
5
+
6
+ def get_minio_client():
7
+ return Minio(
8
+ settings.MINIO_ENDPOINT,
9
+ access_key=settings.MINIO_ACCESS_KEY,
10
+ secret_key=settings.MINIO_SECRET_KEY,
11
+ secure=settings.MINIO_SECURE
12
+ )
texttovoice/models.py CHANGED
@@ -17,8 +17,8 @@ class TextToSpeech(BaseModel):
17
  max_length=255,
18
  default="In the quest for a sustainable future, renewable energy emerges as a beacon of hope"
19
  )
20
- speaker_wav = models.FileField(upload_to='speaker_wav/')
21
- output_wav = models.FileField(upload_to='output_wav/')
22
  language = models.CharField(
23
  max_length=2, # Adjust the max length based on your language code requirements
24
  default="en"
 
17
  max_length=255,
18
  default="In the quest for a sustainable future, renewable energy emerges as a beacon of hope"
19
  )
20
+ speaker_wav = models.CharField( max_length=255)
21
+ output_wav = models.CharField( max_length=255)
22
  language = models.CharField(
23
  max_length=2, # Adjust the max length based on your language code requirements
24
  default="en"
texttovoice/serializers.py CHANGED
@@ -1,6 +1,39 @@
1
  from rest_framework import serializers
 
2
 
3
  class TextToSpeechSerializer(serializers.Serializer):
4
  text = serializers.CharField(default="In the quest for a sustainable future, renewable energy emerges as a beacon of hope")
5
  speaker_wav = serializers.FileField()
6
  language = serializers.CharField(default="en")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from rest_framework import serializers
2
+ from .models import TextToSpeech
3
 
4
  class TextToSpeechSerializer(serializers.Serializer):
5
  text = serializers.CharField(default="In the quest for a sustainable future, renewable energy emerges as a beacon of hope")
6
  speaker_wav = serializers.FileField()
7
  language = serializers.CharField(default="en")
8
+
9
+
10
+ class TextToSpeechSerializerResponse(serializers.ModelSerializer):
11
+ created_by = serializers.SerializerMethodField()
12
+
13
+ class Meta:
14
+ model = TextToSpeech
15
+ fields = ['id', 'text', 'speaker_wav', 'output_wav', 'language', 'created_by']
16
+ # Ensure that all the fields you want to include are listed here
17
+
18
+ def get_created_by(self, obj):
19
+ return obj.created_by.username if obj.created_by else None
20
+
21
+ class TextToSpeechSerializerResponseWithURL(serializers.ModelSerializer):
22
+ speaker_wav_url = serializers.SerializerMethodField()
23
+ output_wav_url = serializers.SerializerMethodField()
24
+ created_by = serializers.SerializerMethodField()
25
+
26
+
27
+ class Meta:
28
+ model = TextToSpeech
29
+ fields = ['id', 'text', 'speaker_wav_url', 'output_wav_url', 'language', 'created_by']
30
+ # Add or remove fields as necessary
31
+
32
+ def get_speaker_wav_url(self, obj):
33
+ return self.context['view'].generate_presigned_url(obj.speaker_wav)
34
+
35
+ def get_output_wav_url(self, obj):
36
+ return self.context['view'].generate_presigned_url(obj.output_wav)
37
+
38
+ def get_created_by(self, obj):
39
+ return obj.created_by.username if obj.created_by else None
texttovoice/urls.py ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ from django.urls import path
2
+ from . import views
3
+
4
+ urlpatterns = [
5
+ path('generate-speech/', views.TextToSpeechCreateView.as_view(), name='Generate speech'),
6
+ path('current_user_speech', views.TextToSpeechListView.as_view(), name="Get all speech of Curent User"),
7
+ ]
8
+
texttovoice/views.py CHANGED
@@ -1,106 +1,151 @@
 
1
  import os
2
  import uuid
3
- import time
4
- import logging # Import the logging module
5
  import torch
6
- from django.http import FileResponse
7
  from rest_framework import status
8
  from rest_framework.response import Response
9
- from rest_framework.generics import CreateAPIView
10
- from TTS.api import TTS
11
  from rest_framework.authentication import TokenAuthentication
12
  from rest_framework.permissions import IsAuthenticated
13
  from texttovoice.models import TextToSpeech
14
- from .serializers import TextToSpeechSerializer
15
  from rest_framework.parsers import MultiPartParser
16
- from drf_yasg import openapi
17
  from drf_yasg.utils import swagger_auto_schema
 
 
 
18
 
 
19
 
20
- # Initialize logger at module level
21
  logger = logging.getLogger(__name__)
 
22
 
23
  class TextToSpeechCreateView(CreateAPIView):
24
  serializer_class = TextToSpeechSerializer
25
- authentication_classes = [TokenAuthentication] # Apply token authentication
26
- permission_classes = [IsAuthenticated] # Require authentication for this view
27
-
28
  parser_classes = [MultiPartParser]
29
 
30
  @swagger_auto_schema(
31
- operation_id='Create a document',
32
- operation_description='Create a document by providing file and s3_key',
33
- manual_parameters=[
34
- openapi.Parameter('file', openapi.IN_FORM, type=openapi.TYPE_FILE, description='Document to be uploaded'),
35
- openapi.Parameter('s3_key', openapi.IN_FORM, type=openapi.TYPE_STRING, description='S3 Key of the Document '
36
- '(folders along with name)')
37
- ],
38
- responses={
39
- status.HTTP_200_OK: openapi.Response(
40
- 'Success', schema=openapi.Schema(type=openapi.TYPE_OBJECT, properties={
41
- 'doc_id': openapi.Schema(type=openapi.TYPE_STRING, description='Document ID'),
42
- 'mime_type': openapi.Schema(type=openapi.TYPE_STRING, description='Mime Type of the Document'),
43
- 'version_id': openapi.Schema(type=openapi.TYPE_STRING, description='S3 version ID of the document')
44
- })
45
- )
46
- }
47
- )
48
-
49
  def create(self, request, *args, **kwargs):
50
  serializer = self.get_serializer(data=request.data)
51
  if serializer.is_valid():
52
- gpu_available = torch.cuda.is_available()
53
- text = serializer.validated_data.get("text")
54
- speaker_wav = serializer.validated_data.get("speaker_wav")
55
- language = serializer.validated_data.get("language")
56
- output_filename = f"output_{uuid.uuid4()}.wav"
57
-
58
- # Log the start time
59
- start_time = time.time()
60
- # Save the uploaded speaker file to a temporary location
61
- speaker_file_path = os.path.join('/tmp', speaker_wav.name)
62
- with open(speaker_file_path, "wb") as destination:
63
- for chunk in speaker_wav.chunks():
64
- destination.write(chunk)
65
-
66
- # Generate speech using tts.tts_to_file
67
- tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu_available)
68
- tts.tts_to_file(text=text, file_path=output_filename, speaker_wav=speaker_file_path, language=language)
69
-
70
- # Log the end time
71
- end_time = time.time()
72
-
73
- # Calculate the processing time
74
- processing_time = end_time - start_time
75
-
76
- # Define a function to delete the output file
77
- def file_iterator(file_name):
78
- with open(file_name, 'rb') as f:
79
- yield from f
80
-
81
- # Delete the file after sending it
82
- try:
83
- os.remove(file_name)
84
- except Exception as e:
85
- # You might want to log this error
86
- pass
87
-
88
- # Use the file_iterator to create a FileResponse
89
-
90
- TextToSpeech.objects.create(
91
- text=text,
92
- speaker_wav=speaker_wav,
93
- output_wav=output_filename,
94
- language=language,
95
- created_by=request.user # Assign the authenticated user here
96
- )
97
- response = FileResponse(file_iterator(output_filename), as_attachment=True, content_type='audio/wav')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
 
99
- # Log the processing time using the logger
100
- logger.info(f"start time: {start_time} , end time: {end_time} and Processing time: {processing_time} seconds")
101
-
102
- return response
103
 
104
- # except Exception as e:
105
- # return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
106
- return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
 
 
 
 
 
 
 
 
1
+ from datetime import timedelta
2
  import os
3
  import uuid
4
+ import logging
 
5
  import torch
 
6
  from rest_framework import status
7
  from rest_framework.response import Response
8
+ from rest_framework.generics import CreateAPIView,ListAPIView
9
+ from TTS.api import TTS # Ensure this import is correct based on your TTS library/package
10
  from rest_framework.authentication import TokenAuthentication
11
  from rest_framework.permissions import IsAuthenticated
12
  from texttovoice.models import TextToSpeech
13
+ from .serializers import TextToSpeechSerializer, TextToSpeechSerializerResponse ,TextToSpeechSerializerResponseWithURL # Ensure this import matches your file structure
14
  from rest_framework.parsers import MultiPartParser
 
15
  from drf_yasg.utils import swagger_auto_schema
16
+ from drf_yasg import openapi
17
+ from rest_framework.exceptions import NotFound as NOT_FOUND
18
+ from .minio_utils import get_minio_client # Ensure this import matches your file structure
19
 
20
+ minio_client = get_minio_client()
21
 
 
22
  logger = logging.getLogger(__name__)
23
+ BUCKET_NAME = "voice-clone"
24
 
25
  class TextToSpeechCreateView(CreateAPIView):
26
  serializer_class = TextToSpeechSerializer
27
+ authentication_classes = [TokenAuthentication]
28
+ permission_classes = [IsAuthenticated]
 
29
  parser_classes = [MultiPartParser]
30
 
31
  @swagger_auto_schema(
32
+ operation_id='Create a document',
33
+ operation_description='Create a document by providing file and s3_key',
34
+ manual_parameters=[
35
+ openapi.Parameter('file', openapi.IN_FORM, type=openapi.TYPE_FILE, description='Document to be uploaded'),
36
+ openapi.Parameter('s3_key', openapi.IN_FORM, type=openapi.TYPE_STRING, description='S3 Key of the Document (folders along with name)')
37
+ ],
38
+ responses={
39
+ status.HTTP_200_OK: openapi.Response(
40
+ 'Success', schema=openapi.Schema(type=openapi.TYPE_OBJECT, properties={
41
+ 'doc_id': openapi.Schema(type=openapi.TYPE_STRING, description='Document ID'),
42
+ 'mime_type': openapi.Schema(type=openapi.TYPE_STRING, description='Mime Type of the Document'),
43
+ 'version_id': openapi.Schema(type=openapi.TYPE_STRING, description='S3 version ID of the document')
44
+ })
45
+ )
46
+ }
47
+ )
 
 
48
  def create(self, request, *args, **kwargs):
49
  serializer = self.get_serializer(data=request.data)
50
  if serializer.is_valid():
51
+ try:
52
+ gpu_available = torch.cuda.is_available()
53
+ text = serializer.validated_data.get("text")
54
+ speaker_wav = serializer.validated_data.get("speaker_wav")
55
+ language = serializer.validated_data.get("language")
56
+
57
+ # Temporary file paths
58
+ speaker_file_path = os.path.join('/tmp', f"{uuid.uuid4()}{speaker_wav.name}")
59
+ output_filename = os.path.join('/tmp', f"{uuid.uuid4()}.wav")
60
+
61
+ # Save speaker WAV file
62
+ with open(speaker_file_path, "wb") as destination:
63
+ for chunk in speaker_wav.chunks():
64
+ destination.write(chunk)
65
+
66
+ # TTS processing
67
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu_available)
68
+ tts.tts_to_file(text=text, file_path=output_filename, speaker_wav=speaker_file_path, language=language)
69
+
70
+ # Upload files to MinIO and cleanup
71
+ public_url,speaker_wav_path = self.upload_file_to_minio(speaker_file_path, 'speakers/')
72
+ public_url_output ,output_wav_path = self.upload_file_to_minio(output_filename, 'output/')
73
+
74
+ # Create DB entry
75
+ tts_instance = TextToSpeech.objects.create(
76
+ text=text,
77
+ speaker_wav=speaker_wav_path,
78
+ output_wav=output_wav_path,
79
+ language=language,
80
+ created_by=request.user
81
+ )
82
+
83
+ # Serialize and return the created instance
84
+ response_serializer = TextToSpeechSerializerResponse(tts_instance)
85
+ response_data = {
86
+ **response_serializer.data,
87
+ "speaker_wav": public_url,
88
+ "output_wav": public_url_output
89
+ }
90
+
91
+ return Response(response_data, status=status.HTTP_201_CREATED)
92
+ except Exception as e:
93
+ logger.error(f"Error processing request: {str(e)}")
94
+ return Response({"error": "An error occurred processing your request."}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
95
+ finally:
96
+ # Ensure cleanup happens
97
+ self.cleanup_files([speaker_file_path, output_filename])
98
+ else:
99
+ return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
100
+
101
+ def upload_file_to_minio(self, file_path, prefix):
102
+ """Uploads a file to MinIO and returns a pre-signed URL for secure, temporary access."""
103
+ file_name = os.path.basename(file_path)
104
+ object_name = f"{prefix}{file_name}"
105
+ with open(file_path, "rb") as file_data:
106
+ minio_client.put_object(BUCKET_NAME, object_name, file_data, os.path.getsize(file_path))
107
+
108
+ # Generate a pre-signed URL for the uploaded object
109
+ pre_signed_url = minio_client.presigned_get_object(BUCKET_NAME, object_name, expires=timedelta(days=1))
110
+ return pre_signed_url ,f"{BUCKET_NAME}/{object_name}"
111
+
112
+
113
+ def cleanup_files(self, file_paths):
114
+ """Removes files from the filesystem."""
115
+ for file_path in file_paths:
116
+ try:
117
+ os.remove(file_path)
118
+ except Exception as e:
119
+ logger.error(f"Failed to delete temporary file {file_path}: {e}")
120
+
121
+
122
+
123
+
124
+ class TextToSpeechListView(ListAPIView):
125
+ serializer_class = TextToSpeechSerializerResponseWithURL
126
+ authentication_classes = [TokenAuthentication]
127
+ permission_classes = [IsAuthenticated]
128
+
129
+ def get_queryset(self):
130
+ return TextToSpeech.objects.filter(created_by=self.request.user)
131
+
132
+ def list(self, request, *args, **kwargs):
133
+ queryset = self.get_queryset()
134
+
135
+ if not queryset.exists():
136
+ raise NOT_FOUND('No text-to-speech data found for the current user.')
137
 
138
+ # Directly serialize the data, pre-signed URLs are handled by the serializer
139
+ serializer = self.get_serializer(queryset, many=True, context={'view': self})
140
+ return Response(serializer.data, status=status.HTTP_200_OK)
 
141
 
142
+ def generate_presigned_url(self, object_path):
143
+ # Ensure this logic correctly splits your `object_path` to get the bucket name and object name
144
+ # This example assumes `object_path` is in the format "bucket_name/object_name"
145
+ try:
146
+ bucket, object_name = object_path.split('/', 1)
147
+ presigned_url = minio_client.presigned_get_object(bucket, object_name, expires=timedelta(hours=1))
148
+ return presigned_url
149
+ except Exception as e:
150
+ logger.error(f"Failed to generate presigned URL for {object_path}: {e}")
151
+ return None