Spaces:

shukdevdatta123
/

Video-Transcriber

Running

App Files Files Community

shukdevdatta123 commited on Feb 8

Commit

2dbd25a

verified ·

1 Parent(s): 60c6961

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -32

app.py CHANGED Viewed

@@ -55,40 +55,48 @@ def transcribe_audio(audio_file):
     except sr.RequestError:
         return "Could not request results from Google Speech Recognition service."
-# Function to get HTML content for extracting video URL
 def gethtml(url):
     headers = {
         "cache-Control": "no-cache",
         "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
         "accept-encoding": "gzip, deflate, br",
-        "accept-language": "zh-CN,zh;q=0.9,en;q=0.8",
         "content-type": "application/x-www-form-urlencoded",
-        "cookie": "lang=en; country=CN; uid=fd94a82a406a8dd4; sfHelperDist=72; reference=14;",
-        "origin": "https://en.savefrom.net",
-        "referer": "https://en.savefrom.net/1-youtube-video-downloader-4/",
         "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
     }
-    kv = {"sf_url": url, "sf_submit": "", "new": "1", "lang": "en", "app": "", "country": "cn", "os": "Windows", "browser": "Chrome"}
     r = requests.post(url="https://en.savefrom.net/savefrom.php", headers=headers, data=kv)
     r.raise_for_status()
     return r.text
-# Function to extract the video download URL with better error handling
 def extract_video_url(youtube_url):
     try:
-        # Get HTML content for the provided URL
         reo = gethtml(youtube_url)
-        # Extract the script containing the video download info
-        reo = reo.split("<script type=\"text/javascript\">")[1].split("</script>")[0]
         reo = reo.replace("(function(){", "(function(){\nthis.alert=function(){};")
         reA = reo.split("\n")
-        # Ensure that the necessary script part is found
         if len(reA) < 3:
             raise ValueError("Could not extract valid script data from the YouTube page.")
-        # Extract the video URL
         name = reA[len(reA) - 3].split(";")[0] + ";"
         addition = """
         const jsdom = require("jsdom");
@@ -101,24 +109,25 @@ def extract_video_url(youtube_url):
         ct = execjs.compile(addition + reo, cwd=r'C:\Users\19308\AppData\Roaming\npm\node_modules')
         text = ct.eval(name.split("=")[1].replace(";", ""))
-        # Validate the extraction of the JSON data
-        result = re.search('show\((.*?)\);;', text, re.I | re.M)
-        if not result:
-            raise ValueError("No video download URL found in the script data.")
-        result = result.group(0).replace("show(", "").replace(");;", "")
-        # Parse the result as JSON
-        j = json.loads(result)
-        # Ensure that the URL data exists
-        if "url" not in j or len(j["url"]) <= 1:
-            raise ValueError("No valid download links found for this video.")
-        # Return the download URL for the video
-        num = 1  # Get the second URL from the available options
-        downurl = j["url"][num]["url"]
-        return downurl
     except Exception as e:
         raise ValueError(f"Error occurred while extracting the download URL: {e}")
@@ -247,10 +256,13 @@ elif tab == "Audio":
                 mime="audio/wav"
             )
-# Streamlit UI for YouTube download
 elif tab == "YouTube":
     youtube_url = st.text_input("Enter YouTube Video URL", "https://www.youtube.com/watch?v=YPvtz1lHRiw")
     if st.button("Get Download Link"):
         if youtube_url:
             try:

     except sr.RequestError:
         return "Could not request results from Google Speech Recognition service."
+# Function to get the HTML of the page
 def gethtml(url):
     headers = {
         "cache-Control": "no-cache",
         "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
         "accept-encoding": "gzip, deflate, br",
+        "accept-language": "en-US,en;q=0.9",
         "content-type": "application/x-www-form-urlencoded",
         "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36"
     }
+    kv = {"sf_url": url,
+          "sf_submit": "",
+          "new": "1",
+          "lang": "en",
+          "app": "",
+          "country": "us",
+          "os": "Windows",
+          "browser": "Chrome"}
     r = requests.post(url="https://en.savefrom.net/savefrom.php", headers=headers, data=kv)
     r.raise_for_status()
     return r.text
+# Function to extract the video download URL
 def extract_video_url(youtube_url):
     try:
+        # Get the HTML content of the YouTube page
         reo = gethtml(youtube_url)
+        # Try extracting the relevant script tag containing download information
+        try:
+            reo = reo.split("<script type=\"text/javascript\">")[1].split("</script>")[0]
+        except IndexError:
+            raise ValueError("Could not find the script containing video data in the HTML response.")
+        # Modify the script to allow extraction
         reo = reo.replace("(function(){", "(function(){\nthis.alert=function(){};")
         reA = reo.split("\n")
         if len(reA) < 3:
             raise ValueError("Could not extract valid script data from the YouTube page.")
+        # Extract the JSON object containing the video download URLs
         name = reA[len(reA) - 3].split(";")[0] + ";"
         addition = """
         const jsdom = require("jsdom");
         ct = execjs.compile(addition + reo, cwd=r'C:\Users\19308\AppData\Roaming\npm\node_modules')
         text = ct.eval(name.split("=")[1].replace(";", ""))
+        # Extract and parse the JSON
+        try:
+            result = re.search('show\((.*?)\);;', text, re.I | re.M)
+            if result is None:
+                raise ValueError("No valid video download URL found in the extracted data.")
+            result = result.group(0).replace("show(", "").replace(");;", "")
+            j = json.loads(result)
+            # Ensure the JSON contains the expected download URLs
+            if "url" not in j or len(j["url"]) == 0:
+                raise ValueError("No valid download links found in the extracted data.")
+            # Assuming the first video URL is what we want (or try a different index if necessary)
+            downurl = j["url"][0]["url"]
+            return downurl
+        except (IndexError, KeyError, json.JSONDecodeError) as e:
+            raise ValueError(f"Error occurred while extracting the download URL: {e}")
     except Exception as e:
         raise ValueError(f"Error occurred while extracting the download URL: {e}")
                 mime="audio/wav"
             )
+# Streamlit UI for YouTube video download
 elif tab == "YouTube":
+    st.title("YouTube Video Downloader")
+    st.write("""This app allows you to download YouTube videos in various formats. Simply enter the YouTube URL below and click "Get Download Link".""")
     youtube_url = st.text_input("Enter YouTube Video URL", "https://www.youtube.com/watch?v=YPvtz1lHRiw")
     if st.button("Get Download Link"):
         if youtube_url:
             try: