siddhartharya
commited on
Update utils.py
Browse files
utils.py
CHANGED
@@ -7,6 +7,8 @@ import json
|
|
7 |
import re
|
8 |
from gtts import gTTS
|
9 |
import tempfile
|
|
|
|
|
10 |
|
11 |
groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
|
12 |
tokenizer = tiktoken.get_encoding("cl100k_base")
|
@@ -24,9 +26,27 @@ def truncate_text(text, max_tokens=2048):
|
|
24 |
return tokenizer.decode(tokens[:max_tokens])
|
25 |
return text
|
26 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
27 |
def generate_script(system_prompt: str, input_text: str, tone: str, target_length: str):
|
28 |
input_text = truncate_text(input_text)
|
29 |
-
word_limit = 300 if target_length == "Short (1-2 min)" else 750
|
30 |
|
31 |
prompt = f"""
|
32 |
{system_prompt}
|
|
|
7 |
import re
|
8 |
from gtts import gTTS
|
9 |
import tempfile
|
10 |
+
import requests
|
11 |
+
from bs4 import BeautifulSoup
|
12 |
|
13 |
groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
|
14 |
tokenizer = tiktoken.get_encoding("cl100k_base")
|
|
|
26 |
return tokenizer.decode(tokens[:max_tokens])
|
27 |
return text
|
28 |
|
29 |
+
def extract_text_from_url(url):
|
30 |
+
try:
|
31 |
+
response = requests.get(url)
|
32 |
+
response.raise_for_status()
|
33 |
+
soup = BeautifulSoup(response.text, 'html.parser')
|
34 |
+
|
35 |
+
for script in soup(["script", "style"]):
|
36 |
+
script.decompose()
|
37 |
+
|
38 |
+
text = soup.get_text()
|
39 |
+
lines = (line.strip() for line in text.splitlines())
|
40 |
+
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
|
41 |
+
text = '\n'.join(chunk for chunk in chunks if chunk)
|
42 |
+
|
43 |
+
return text
|
44 |
+
except Exception as e:
|
45 |
+
raise ValueError(f"Error extracting text from URL: {str(e)}")
|
46 |
+
|
47 |
def generate_script(system_prompt: str, input_text: str, tone: str, target_length: str):
|
48 |
input_text = truncate_text(input_text)
|
49 |
+
word_limit = 300 if target_length == "Short (1-2 min)" else 750
|
50 |
|
51 |
prompt = f"""
|
52 |
{system_prompt}
|