siddhartharya commited on
Commit
af743db
·
verified ·
1 Parent(s): cd8b9f6

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +18 -79
utils.py CHANGED
@@ -1,79 +1,18 @@
1
- from groq import Groq
2
- from pydantic import BaseModel, ValidationError
3
- from typing import List, Literal
4
- import os
5
- import tiktoken
6
- import tempfile
7
- import json
8
- import re
9
- from transformers import pipeline
10
- import torch
11
- import soundfile as sf
12
-
13
- groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
14
- tokenizer = tiktoken.get_encoding("cl100k_base")
15
-
16
- # Initialize TTS pipelines
17
- tts_male = pipeline("text-to-speech", model="microsoft/speecht5_tts", device="cpu")
18
- tts_female = pipeline("text-to-speech", model="microsoft/speecht5_tts", device="cpu")
19
-
20
- # Load speaker embeddings
21
- male_embedding = torch.load("https://huggingface.co/microsoft/speecht5_tts/resolve/main/en_speaker_1.pt")
22
- female_embedding = torch.load("https://huggingface.co/microsoft/speecht5_tts/resolve/main/en_speaker_9.pt")
23
-
24
- class DialogueItem(BaseModel):
25
- speaker: Literal["John", "Sarah"] # Changed from "Host" and "Guest" to "John" and "Sarah"
26
- text: str
27
-
28
- class Dialogue(BaseModel):
29
- dialogue: List[DialogueItem]
30
-
31
- def truncate_text(text, max_tokens=2048):
32
- tokens = tokenizer.encode(text)
33
- if len(tokens) > max_tokens:
34
- return tokenizer.decode(tokens[:max_tokens])
35
- return text
36
-
37
- def generate_script(system_prompt: str, input_text: str, tone: str):
38
- input_text = truncate_text(input_text)
39
- prompt = f"{system_prompt}\nTONE: {tone}\nINPUT TEXT: {input_text}"
40
-
41
- response = groq_client.chat.completions.create(
42
- messages=[
43
- {"role": "system", "content": prompt},
44
- ],
45
- model="llama-3.1-70b-versatile",
46
- max_tokens=2048,
47
- temperature=0.7
48
- )
49
-
50
- content = response.choices[0].message.content
51
- content = re.sub(r'```json\s*|\s*```', '', content)
52
-
53
- try:
54
- json_data = json.loads(content)
55
- dialogue = Dialogue.model_validate(json_data)
56
- except json.JSONDecodeError as json_error:
57
- match = re.search(r'\{.*\}', content, re.DOTALL)
58
- if match:
59
- try:
60
- json_data = json.loads(match.group())
61
- dialogue = Dialogue.model_validate(json_data)
62
- except (json.JSONDecodeError, ValidationError) as e:
63
- raise ValueError(f"Failed to parse dialogue JSON: {e}\nContent: {content}")
64
- else:
65
- raise ValueError(f"Failed to find valid JSON in the response: {content}")
66
- except ValidationError as e:
67
- raise ValueError(f"Failed to validate dialogue structure: {e}\nContent: {content}")
68
-
69
- return dialogue
70
-
71
- def generate_audio(text: str, speaker: str) -> str:
72
- if speaker == "John":
73
- speech = tts_male(text, speaker_embeddings=male_embedding)
74
- else: # Sarah
75
- speech = tts_female(text, speaker_embeddings=female_embedding)
76
-
77
- with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
78
- sf.write(temp_audio.name, speech["audio"], speech["sampling_rate"])
79
- return temp_audio.name
 
1
+ runtime error
2
+ Exit code: 1. Reason: Traceback (most recent call last):
3
+ File "/home/user/app/app.py", line 2, in <module>
4
+ from utils import generate_script, generate_audio, truncate_text
5
+ File "/home/user/app/utils.py", line 17, in <module>
6
+ tts_male = pipeline("text-to-speech", model="microsoft/speecht5_tts", device="cpu")
7
+ File "/usr/local/lib/python3.10/site-packages/transformers/pipelines/__init__.py", line 999, in pipeline
8
+ tokenizer = AutoTokenizer.from_pretrained(
9
+ File "/usr/local/lib/python3.10/site-packages/transformers/models/auto/tokenization_auto.py", line 907, in from_pretrained
10
+ return tokenizer_class.from_pretrained(pretrained_model_name_or_path, *inputs, **kwargs)
11
+ File "/usr/local/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1637, in __getattribute__
12
+ requires_backends(cls, cls._backends)
13
+ File "/usr/local/lib/python3.10/site-packages/transformers/utils/import_utils.py", line 1625, in requires_backends
14
+ raise ImportError("".join(failed))
15
+ ImportError:
16
+ SpeechT5Tokenizer requires the SentencePiece library but it was not found in your environment. Checkout the instructions on the
17
+ installation page of its repo: https://github.com/google/sentencepiece#installation and follow the ones
18
+ that match your environment. Please note that you may need to restart your runtime after installation.