File size: 4,705 Bytes
2d610a5
 
e0491f4
 
 
2d610a5
 
 
 
 
52a3fd6
 
4b98fcb
2d610a5
6dc35f3
 
 
 
2d610a5
e0491f4
 
 
2d610a5
 
23200db
 
2d610a5
6dc35f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d610a5
 
 
e0491f4
2d610a5
 
 
 
 
e0491f4
 
6c85678
f40b874
6c85678
 
 
 
 
e0491f4
23200db
2d610a5
e0491f4
 
 
 
 
 
6c85678
 
 
 
2d610a5
6c85678
 
 
 
 
 
2d610a5
6c85678
f40b874
4b98fcb
 
 
 
 
 
 
 
e0491f4
 
 
 
6c85678
2d610a5
6c85678
 
2d610a5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
import os
import uuid
import time
import logging  # Import the logging module
import torch
from django.http import FileResponse
from rest_framework import status
from rest_framework.response import Response
from rest_framework.generics import CreateAPIView
from TTS.api import TTS
from rest_framework.authentication import TokenAuthentication
from rest_framework.permissions import IsAuthenticated
from texttovoice.models import TextToSpeech
from .serializers import TextToSpeechSerializer
from rest_framework.parsers import MultiPartParser
from drf_yasg import openapi
from drf_yasg.utils import swagger_auto_schema


# Initialize logger at module level
logger = logging.getLogger(__name__)

class TextToSpeechCreateView(CreateAPIView):
    serializer_class = TextToSpeechSerializer
    authentication_classes = [TokenAuthentication]  # Apply token authentication
    permission_classes = [IsAuthenticated]  # Require authentication for this view

    parser_classes = [MultiPartParser]

    @swagger_auto_schema(
            operation_id='Create a document',
            operation_description='Create a document by providing file and s3_key',
            manual_parameters=[
                openapi.Parameter('file', openapi.IN_FORM, type=openapi.TYPE_FILE, description='Document to be uploaded'),
                openapi.Parameter('s3_key', openapi.IN_FORM, type=openapi.TYPE_STRING, description='S3 Key of the Document '
                                                                                                   '(folders along with name)')
            ],
            responses={
                status.HTTP_200_OK: openapi.Response(
                    'Success', schema=openapi.Schema(type=openapi.TYPE_OBJECT, properties={
                        'doc_id': openapi.Schema(type=openapi.TYPE_STRING, description='Document ID'),
                        'mime_type': openapi.Schema(type=openapi.TYPE_STRING, description='Mime Type of the Document'),
                        'version_id': openapi.Schema(type=openapi.TYPE_STRING, description='S3 version ID of the document')
                    })
                )
            }
        )

    def create(self, request, *args, **kwargs):
        serializer = self.get_serializer(data=request.data)
        if serializer.is_valid():
            gpu_available = torch.cuda.is_available()
            text = serializer.validated_data.get("text")
            speaker_wav = serializer.validated_data.get("speaker_wav")
            language = serializer.validated_data.get("language")
            output_filename = f"output_{uuid.uuid4()}.wav"

            # Log the start time
            start_time = time.time()
            # Save the uploaded speaker file to a temporary location
            speaker_file_path = os.path.join('/tmp', speaker_wav.name)
            with open(speaker_file_path, "wb") as destination:
                for chunk in speaker_wav.chunks():
                    destination.write(chunk)
            
            # Generate speech using tts.tts_to_file
            tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu_available)
            tts.tts_to_file(text=text, file_path=output_filename, speaker_wav=speaker_file_path, language=language)

            # Log the end time
            end_time = time.time()

            # Calculate the processing time
            processing_time = end_time - start_time

            # Define a function to delete the output file
            def file_iterator(file_name):
                with open(file_name, 'rb') as f:
                    yield from f

                # Delete the file after sending it
                try:
                    os.remove(file_name)
                except Exception as e:
                    # You might want to log this error
                    pass

            # Use the file_iterator to create a FileResponse
            
            TextToSpeech.objects.create(
            text=text,
            speaker_wav=speaker_wav,
            output_wav=output_filename,
            language=language,
            created_by=request.user  # Assign the authenticated user here
            )
            response = FileResponse(file_iterator(output_filename), as_attachment=True, content_type='audio/wav')

            # Log the processing time using the logger
            logger.info(f"start time: {start_time} , end time: {end_time} and Processing time: {processing_time} seconds")
            
            return response

            # except Exception as e:
            #     return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
        return Response(serializer.errors, status=status.HTTP_400_BAD_REQUEST)