siddhartharya commited on
Commit
b5389b5
·
verified ·
1 Parent(s): 2a109d4

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +21 -1
utils.py CHANGED
@@ -7,6 +7,8 @@ import json
7
  import re
8
  from gtts import gTTS
9
  import tempfile
 
 
10
 
11
  groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
12
  tokenizer = tiktoken.get_encoding("cl100k_base")
@@ -24,9 +26,27 @@ def truncate_text(text, max_tokens=2048):
24
  return tokenizer.decode(tokens[:max_tokens])
25
  return text
26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  def generate_script(system_prompt: str, input_text: str, tone: str, target_length: str):
28
  input_text = truncate_text(input_text)
29
- word_limit = 300 if target_length == "Short (1-2 min)" else 750 # Assuming 150 words per minute
30
 
31
  prompt = f"""
32
  {system_prompt}
 
7
  import re
8
  from gtts import gTTS
9
  import tempfile
10
+ import requests
11
+ from bs4 import BeautifulSoup
12
 
13
  groq_client = Groq(api_key=os.environ["GROQ_API_KEY"])
14
  tokenizer = tiktoken.get_encoding("cl100k_base")
 
26
  return tokenizer.decode(tokens[:max_tokens])
27
  return text
28
 
29
+ def extract_text_from_url(url):
30
+ try:
31
+ response = requests.get(url)
32
+ response.raise_for_status()
33
+ soup = BeautifulSoup(response.text, 'html.parser')
34
+
35
+ for script in soup(["script", "style"]):
36
+ script.decompose()
37
+
38
+ text = soup.get_text()
39
+ lines = (line.strip() for line in text.splitlines())
40
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
41
+ text = '\n'.join(chunk for chunk in chunks if chunk)
42
+
43
+ return text
44
+ except Exception as e:
45
+ raise ValueError(f"Error extracting text from URL: {str(e)}")
46
+
47
  def generate_script(system_prompt: str, input_text: str, tone: str, target_length: str):
48
  input_text = truncate_text(input_text)
49
+ word_limit = 300 if target_length == "Short (1-2 min)" else 750
50
 
51
  prompt = f"""
52
  {system_prompt}