Spaces:

undetectable
/

voice-clone

Configuration error

App Files Files Community

renator commited on Feb 9, 2024

Commit

df4965a

1 Parent(s): 4b98fcb

update minio bucket

Browse files

Files changed (10) hide show

config/settings.py +7 -0
config/urls.py +1 -2
db.sqlite3 +0 -0
requirements.txt +1 -0
texttovoice/migrations/0001_initial.py +3 -3
texttovoice/minio_utils.py +12 -0
texttovoice/models.py +2 -2
texttovoice/serializers.py +33 -0
texttovoice/urls.py +8 -0
texttovoice/views.py +127 -82

config/settings.py CHANGED Viewed

@@ -181,6 +181,13 @@ STATIC_URL = 'static/'
 MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
 MEDIA_URL = '/media/'
 # Default primary key field type
 # https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field

 MEDIA_ROOT = os.path.join(BASE_DIR, 'media')
 MEDIA_URL = '/media/'
+# settings.py
+MINIO_ENDPOINT = '64.176.199.162:9000'
+MINIO_ACCESS_KEY = 'voice-clone'
+MINIO_SECRET_KEY = 'voice-clone'
+MINIO_SECURE = False  # Change to True if using HTTPS
 # Default primary key field type
 # https://docs.djangoproject.com/en/5.0/ref/settings/#default-auto-field

config/urls.py CHANGED Viewed

@@ -22,12 +22,11 @@ schema_view = get_schema_view(
 )
-from texttovoice.views import TextToSpeechCreateView
 urlpatterns = [
     path('admin/', admin.site.urls),
     path('auth/', include('accounts.urls')),
-    path('generate-speech/', TextToSpeechCreateView.as_view(), name='generate-speech-create'),
     path('', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'),
 ]

 )
 urlpatterns = [
     path('admin/', admin.site.urls),
     path('auth/', include('accounts.urls')),
+    path('speech/', include('texttovoice.urls')),
     path('', schema_view.with_ui('swagger', cache_timeout=0), name='schema-swagger-ui'),
 ]

db.sqlite3 CHANGED Viewed

Binary files a/db.sqlite3 and b/db.sqlite3 differ

requirements.txt CHANGED Viewed

@@ -13,6 +13,7 @@ Django
 djangorestframework
 drf-yasg
 django-cors-headers
 # fastapi==0.70.0
 # uvicorn==0.15.0

 djangorestframework
 drf-yasg
 django-cors-headers
+minio
 # fastapi==0.70.0
 # uvicorn==0.15.0

texttovoice/migrations/0001_initial.py CHANGED Viewed

@@ -1,4 +1,4 @@
-# Generated by Django 5.0.1 on 2024-01-25 15:38
 import django.db.models.deletion
 from django.conf import settings
@@ -20,8 +20,8 @@ class Migration(migrations.Migration):
                 ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                 ('created_date', models.DateTimeField(auto_now_add=True)),
                 ('text', models.CharField(default='In the quest for a sustainable future, renewable energy emerges as a beacon of hope', max_length=255)),
-                ('speaker_wav', models.FileField(upload_to='speaker_wav/')),
-                ('output_wav', models.FileField(upload_to='output_wav/')),
                 ('language', models.CharField(default='en', max_length=2)),
                 ('created_by', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='%(class)s_created_by', to=settings.AUTH_USER_MODEL)),
             ],

+# Generated by Django 5.0.1 on 2024-02-07 16:14
 import django.db.models.deletion
 from django.conf import settings
                 ('id', models.BigAutoField(auto_created=True, primary_key=True, serialize=False, verbose_name='ID')),
                 ('created_date', models.DateTimeField(auto_now_add=True)),
                 ('text', models.CharField(default='In the quest for a sustainable future, renewable energy emerges as a beacon of hope', max_length=255)),
+                ('speaker_wav', models.CharField(max_length=255)),
+                ('output_wav', models.CharField(max_length=255)),
                 ('language', models.CharField(default='en', max_length=2)),
                 ('created_by', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='%(class)s_created_by', to=settings.AUTH_USER_MODEL)),
             ],

texttovoice/minio_utils.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# minio_utils.py
+from minio import Minio
+from django.conf import settings
+def get_minio_client():
+    return Minio(
+        settings.MINIO_ENDPOINT,
+        access_key=settings.MINIO_ACCESS_KEY,
+        secret_key=settings.MINIO_SECRET_KEY,
+        secure=settings.MINIO_SECURE
+    )

texttovoice/models.py CHANGED Viewed

@@ -17,8 +17,8 @@ class TextToSpeech(BaseModel):
         max_length=255,
         default="In the quest for a sustainable future, renewable energy emerges as a beacon of hope"
     )
-    speaker_wav = models.FileField(upload_to='speaker_wav/')
-    output_wav = models.FileField(upload_to='output_wav/')
     language = models.CharField(
         max_length=2,  # Adjust the max length based on your language code requirements
         default="en"

         max_length=255,
         default="In the quest for a sustainable future, renewable energy emerges as a beacon of hope"
     )
+    speaker_wav = models.CharField( max_length=255)
+    output_wav = models.CharField( max_length=255)
     language = models.CharField(
         max_length=2,  # Adjust the max length based on your language code requirements
         default="en"

texttovoice/serializers.py CHANGED Viewed

@@ -1,6 +1,39 @@
 from rest_framework import serializers
 class TextToSpeechSerializer(serializers.Serializer):
     text = serializers.CharField(default="In the quest for a sustainable future, renewable energy emerges as a beacon of hope")
     speaker_wav = serializers.FileField()
     language = serializers.CharField(default="en")

 from rest_framework import serializers
+from .models import TextToSpeech
 class TextToSpeechSerializer(serializers.Serializer):
     text = serializers.CharField(default="In the quest for a sustainable future, renewable energy emerges as a beacon of hope")
     speaker_wav = serializers.FileField()
     language = serializers.CharField(default="en")
+class TextToSpeechSerializerResponse(serializers.ModelSerializer):
+    created_by = serializers.SerializerMethodField()
+    class Meta:
+        model = TextToSpeech
+        fields = ['id', 'text', 'speaker_wav', 'output_wav', 'language', 'created_by']
+        # Ensure that all the fields you want to include are listed here
+    def get_created_by(self, obj):
+        return obj.created_by.username if obj.created_by else None
+class TextToSpeechSerializerResponseWithURL(serializers.ModelSerializer):
+    speaker_wav_url = serializers.SerializerMethodField()
+    output_wav_url = serializers.SerializerMethodField()
+    created_by = serializers.SerializerMethodField()
+    class Meta:
+        model = TextToSpeech
+        fields = ['id', 'text', 'speaker_wav_url', 'output_wav_url', 'language', 'created_by']
+        # Add or remove fields as necessary
+    def get_speaker_wav_url(self, obj):
+        return self.context['view'].generate_presigned_url(obj.speaker_wav)
+    def get_output_wav_url(self, obj):
+        return self.context['view'].generate_presigned_url(obj.output_wav)
+    def get_created_by(self, obj):
+        return obj.created_by.username if obj.created_by else None

texttovoice/urls.py ADDED Viewed

	@@ -0,0 +1,8 @@

+from django.urls import path
+from . import views
+urlpatterns = [
+    path('generate-speech/', views.TextToSpeechCreateView.as_view(), name='Generate speech'),
+    path('current_user_speech', views.TextToSpeechListView.as_view(), name="Get all speech of Curent User"),
+]

texttovoice/views.py CHANGED Viewed

@@ -1,106 +1,151 @@
 import os
 import uuid
-import time
-import logging  # Import the logging module
 import torch
-from django.http import FileResponse
 from rest_framework import status
 from rest_framework.response import Response
-from rest_framework.generics import CreateAPIView
-from TTS.api import TTS
 from rest_framework.authentication import TokenAuthentication
 from rest_framework.permissions import IsAuthenticated
 from texttovoice.models import TextToSpeech
-from .serializers import TextToSpeechSerializer
 from rest_framework.parsers import MultiPartParser
-from drf_yasg import openapi
 from drf_yasg.utils import swagger_auto_schema
-# Initialize logger at module level
 logger = logging.getLogger(__name__)
 class TextToSpeechCreateView(CreateAPIView):
     serializer_class = TextToSpeechSerializer
-    authentication_classes = [TokenAuthentication]  # Apply token authentication
-    permission_classes = [IsAuthenticated]  # Require authentication for this view
     parser_classes = [MultiPartParser]
     @swagger_auto_schema(
-            operation_id='Create a document',
-            operation_description='Create a document by providing file and s3_key',
-            manual_parameters=[
-                openapi.Parameter('file', openapi.IN_FORM, type=openapi.TYPE_FILE, description='Document to be uploaded'),
-                openapi.Parameter('s3_key', openapi.IN_FORM, type=openapi.TYPE_STRING, description='S3 Key of the Document '
-                                                                                                   '(folders along with name)')
-            ],
-            responses={
-                status.HTTP_200_OK: openapi.Response(
-                    'Success', schema=openapi.Schema(type=openapi.TYPE_OBJECT, properties={
-                        'doc_id': openapi.Schema(type=openapi.TYPE_STRING, description='Document ID'),
-                        'mime_type': openapi.Schema(type=openapi.TYPE_STRING, description='Mime Type of the Document'),
-                        'version_id': openapi.Schema(type=openapi.TYPE_STRING, description='S3 version ID of the document')
-                    })
-                )
-            }
-        )
     def create(self, request, *args, **kwargs):
         serializer = self.get_serializer(data=request.data)
         if serializer.is_valid():
-            gpu_available = torch.cuda.is_available()
-            text = serializer.validated_data.get("text")
-            speaker_wav = serializer.validated_data.get("speaker_wav")
-            language = serializer.validated_data.get("language")
-            output_filename = f"output_{uuid.uuid4()}.wav"
-            # Log the start time
-            start_time = time.time()
-            # Save the uploaded speaker file to a temporary location
-            speaker_file_path = os.path.join('/tmp', speaker_wav.name)
-            with open(speaker_file_path, "wb") as destination:
-                for chunk in speaker_wav.chunks():
-                    destination.write(chunk)
-            # Generate speech using tts.tts_to_file
-            tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu_available)
-            tts.tts_to_file(text=text, file_path=output_filename, speaker_wav=speaker_file_path, language=language)
-            # Log the end time
-            end_time = time.time()
-            # Calculate the processing time
-            processing_time = end_time - start_time
-            # Define a function to delete the output file
-            def file_iterator(file_name):
-                with open(file_name, 'rb') as f:
-                    yield from f
-                # Delete the file after sending it
-                try:
-                    os.remove(file_name)
-                except Exception as e:
-                    # You might want to log this error
-                    pass
-            # Use the file_iterator to create a FileResponse
-            TextToSpeech.objects.create(
-            text=text,
-            speaker_wav=speaker_wav,
-            output_wav=output_filename,
-            language=language,
-            created_by=request.user  # Assign the authenticated user here
-            )
-            response = FileResponse(file_iterator(output_filename), as_attachment=True, content_type='audio/wav')
-            # Log the processing time using the logger
-            logger.info(f"start time: {start_time} , end time: {end_time} and Processing time: {processing_time} seconds")
-            return response
-            # except Exception as e:
-            #     return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
-        return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)

+from datetime import timedelta
 import os
 import uuid
+import logging
 import torch
 from rest_framework import status
 from rest_framework.response import Response
+from rest_framework.generics import CreateAPIView,ListAPIView
+from TTS.api import TTS  # Ensure this import is correct based on your TTS library/package
 from rest_framework.authentication import TokenAuthentication
 from rest_framework.permissions import IsAuthenticated
 from texttovoice.models import TextToSpeech
+from .serializers import TextToSpeechSerializer, TextToSpeechSerializerResponse ,TextToSpeechSerializerResponseWithURL # Ensure this import matches your file structure
 from rest_framework.parsers import MultiPartParser
 from drf_yasg.utils import swagger_auto_schema
+from drf_yasg import openapi
+from rest_framework.exceptions import NotFound as NOT_FOUND
+from .minio_utils import get_minio_client  # Ensure this import matches your file structure
+minio_client = get_minio_client()
 logger = logging.getLogger(__name__)
+BUCKET_NAME = "voice-clone"
 class TextToSpeechCreateView(CreateAPIView):
     serializer_class = TextToSpeechSerializer
+    authentication_classes = [TokenAuthentication]
+    permission_classes = [IsAuthenticated]
     parser_classes = [MultiPartParser]
     @swagger_auto_schema(
+        operation_id='Create a document',
+        operation_description='Create a document by providing file and s3_key',
+        manual_parameters=[
+            openapi.Parameter('file', openapi.IN_FORM, type=openapi.TYPE_FILE, description='Document to be uploaded'),
+            openapi.Parameter('s3_key', openapi.IN_FORM, type=openapi.TYPE_STRING, description='S3 Key of the Document (folders along with name)')
+        ],
+        responses={
+            status.HTTP_200_OK: openapi.Response(
+                'Success', schema=openapi.Schema(type=openapi.TYPE_OBJECT, properties={
+                    'doc_id': openapi.Schema(type=openapi.TYPE_STRING, description='Document ID'),
+                    'mime_type': openapi.Schema(type=openapi.TYPE_STRING, description='Mime Type of the Document'),
+                    'version_id': openapi.Schema(type=openapi.TYPE_STRING, description='S3 version ID of the document')
+                })
+            )
+        }
+    )
     def create(self, request, *args, **kwargs):
         serializer = self.get_serializer(data=request.data)
         if serializer.is_valid():
+            try:
+                gpu_available = torch.cuda.is_available()
+                text = serializer.validated_data.get("text")
+                speaker_wav = serializer.validated_data.get("speaker_wav")
+                language = serializer.validated_data.get("language")
+                # Temporary file paths
+                speaker_file_path = os.path.join('/tmp', f"{uuid.uuid4()}{speaker_wav.name}")
+                output_filename = os.path.join('/tmp', f"{uuid.uuid4()}.wav")
+                # Save speaker WAV file
+                with open(speaker_file_path, "wb") as destination:
+                    for chunk in speaker_wav.chunks():
+                        destination.write(chunk)
+                # TTS processing
+                tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu_available)
+                tts.tts_to_file(text=text, file_path=output_filename, speaker_wav=speaker_file_path, language=language)
+                # Upload files to MinIO and cleanup
+                public_url,speaker_wav_path  = self.upload_file_to_minio(speaker_file_path, 'speakers/')
+                public_url_output ,output_wav_path = self.upload_file_to_minio(output_filename, 'output/')
+                # Create DB entry
+                tts_instance = TextToSpeech.objects.create(
+                    text=text,
+                    speaker_wav=speaker_wav_path,
+                    output_wav=output_wav_path,
+                    language=language,
+                    created_by=request.user
+                )
+                # Serialize and return the created instance
+                response_serializer = TextToSpeechSerializerResponse(tts_instance)
+                response_data = {
+                    **response_serializer.data,
+                    "speaker_wav": public_url,
+                    "output_wav": public_url_output
+                }
+                return Response(response_data, status=status.HTTP_201_CREATED)
+            except Exception as e:
+                logger.error(f"Error processing request: {str(e)}")
+                return Response({"error": "An error occurred processing your request."}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
+            finally:
+                # Ensure cleanup happens
+                self.cleanup_files([speaker_file_path, output_filename])
+        else:
+            return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)
+    def upload_file_to_minio(self, file_path, prefix):
+        """Uploads a file to MinIO and returns a pre-signed URL for secure, temporary access."""
+        file_name = os.path.basename(file_path)
+        object_name = f"{prefix}{file_name}"
+        with open(file_path, "rb") as file_data:
+            minio_client.put_object(BUCKET_NAME, object_name, file_data, os.path.getsize(file_path))
+        # Generate a pre-signed URL for the uploaded object
+        pre_signed_url = minio_client.presigned_get_object(BUCKET_NAME, object_name, expires=timedelta(days=1))
+        return pre_signed_url ,f"{BUCKET_NAME}/{object_name}"
+    def cleanup_files(self, file_paths):
+        """Removes files from the filesystem."""
+        for file_path in file_paths:
+            try:
+                os.remove(file_path)
+            except Exception as e:
+                logger.error(f"Failed to delete temporary file {file_path}: {e}")
+class TextToSpeechListView(ListAPIView):
+    serializer_class = TextToSpeechSerializerResponseWithURL
+    authentication_classes = [TokenAuthentication]
+    permission_classes = [IsAuthenticated]
+    def get_queryset(self):
+        return TextToSpeech.objects.filter(created_by=self.request.user)
+    def list(self, request, *args, **kwargs):
+        queryset = self.get_queryset()
+        if not queryset.exists():
+            raise NOT_FOUND('No text-to-speech data found for the current user.')
+        # Directly serialize the data, pre-signed URLs are handled by the serializer
+        serializer = self.get_serializer(queryset, many=True, context={'view': self})
+        return Response(serializer.data, status=status.HTTP_200_OK)
+    def generate_presigned_url(self, object_path):
+        # Ensure this logic correctly splits your `object_path` to get the bucket name and object name
+        # This example assumes `object_path` is in the format "bucket_name/object_name"
+        try:
+            bucket, object_name = object_path.split('/', 1)
+            presigned_url = minio_client.presigned_get_object(bucket, object_name, expires=timedelta(hours=1))
+            return presigned_url
+        except Exception as e:
+            logger.error(f"Failed to generate presigned URL for {object_path}: {e}")
+            return None