File size: 3,918 Bytes
1df1eb4
 
 
 
 
 
 
 
b5389b5
 
fb1499c
 
1df1eb4
 
 
ef260c8
1df1eb4
 
bf3ae24
1df1eb4
 
 
 
 
 
 
 
 
 
 
b5389b5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e81cad8
1df1eb4
b5389b5
e81cad8
 
 
 
 
 
 
 
 
 
bf3ae24
e81cad8
 
 
 
 
1df1eb4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45aea5e
bf3ae24
 
ef260c8
 
 
 
 
fb1499c
 
 
 
 
ef260c8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
from groq import Groq
from pydantic import BaseModel, ValidationError
from typing import List, Literal
import os
import tiktoken
import json
import re
import tempfile
import requests
from bs4 import BeautifulSoup
from pydub import AudioSegment
import io

groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
tokenizer = tiktoken.get_encoding("cl100k_base")
VOICERSS_API_KEY = os.environ["VOICERSS_API_KEY"]

class DialogueItem(BaseModel):
    speaker: Literal["John", "Lily"]
    text: str

class Dialogue(BaseModel):
    dialogue: List[DialogueItem]

def truncate_text(text, max_tokens=2048):
    tokens = tokenizer.encode(text)
    if len(tokens) > max_tokens:
        return tokenizer.decode(tokens[:max_tokens])
    return text

def extract_text_from_url(url):
    try:
        response = requests.get(url)
        response.raise_for_status()
        soup = BeautifulSoup(response.text, 'html.parser')
        
        for script in soup(["script", "style"]):
            script.decompose()
        
        text = soup.get_text()
        lines = (line.strip() for line in text.splitlines())
        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
        text = '\n'.join(chunk for chunk in chunks if chunk)
        
        return text
    except Exception as e:
        raise ValueError(f"Error extracting text from URL: {str(e)}")

def generate_script(system_prompt: str, input_text: str, tone: str, target_length: str):
    input_text = truncate_text(input_text)
    word_limit = 300 if target_length == "Short (1-2 min)" else 750
    
    prompt = f"""
    {system_prompt}
    TONE: {tone}
    TARGET LENGTH: {target_length} (approximately {word_limit} words)
    INPUT TEXT: {input_text}

    Generate a complete, well-structured podcast script that:
    1. Starts with a proper introduction
    2. Covers the main points from the input text
    3. Has a natural flow of conversation between John and Lily
    4. Concludes with a summary and sign-off
    5. Fits within the {word_limit} word limit for the target length of {target_length}

    Ensure the script is not abruptly cut off and forms a complete conversation.
    """
    
    response = groq_client.chat.completions.create(
        messages=[
            {"role": "system", "content": prompt},
        ],
        model="llama-3.1-70b-versatile",
        max_tokens=2048,
        temperature=0.7
    )
    
    content = response.choices[0].message.content
    content = re.sub(r'```json\s*|\s*```', '', content)
    
    try:
        json_data = json.loads(content)
        dialogue = Dialogue.model_validate(json_data)
    except json.JSONDecodeError as json_error:
        match = re.search(r'\{.*\}', content, re.DOTALL)
        if match:
            try:
                json_data = json.loads(match.group())
                dialogue = Dialogue.model_validate(json_data)
            except (json.JSONDecodeError, ValidationError) as e:
                raise ValueError(f"Failed to parse dialogue JSON: {e}\nContent: {content}")
        else:
            raise ValueError(f"Failed to find valid JSON in the response: {content}")
    except ValidationError as e:
        raise ValueError(f"Failed to validate dialogue structure: {e}\nContent: {content}")
    
    return dialogue

def generate_audio(text: str, speaker: str) -> str:
    voice = "John" if speaker == "John" else "Lily"
    url = f"http://api.voicerss.org/?key={VOICERSS_API_KEY}&hl=en-us&v={voice}&src={text}"
    
    response = requests.get(url)
    if response.status_code != 200:
        raise Exception(f"Error generating audio: {response.text}")
    
    # Convert the audio data to a format Gradio can handle
    audio = AudioSegment.from_mp3(io.BytesIO(response.content))
    
    with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_audio:
        audio.export(temp_audio.name, format="wav")
        return temp_audio.name