rajaramesh commited on
Commit
6dcca02
·
1 Parent(s): c7b7071

Add application file

Browse files
Files changed (2) hide show
  1. app.py +104 -0
  2. requirements.txt +4 -0
app.py ADDED
@@ -0,0 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+
3
+ import requests
4
+ from langchain_community.document_loaders import UnstructuredURLLoader
5
+ from youtube_transcript_api import YouTubeTranscriptApi
6
+
7
+ import subprocess
8
+
9
+
10
+ def text_extract(generic_url: str) -> str:
11
+ """
12
+ Extract the text from any website or youtube video.
13
+ Args:
14
+ url (str): the url of website or youtube to extract text from it
15
+ Returns:
16
+ str: A string containing text extracted from website or youtube
17
+ """
18
+
19
+ final_text=""
20
+
21
+ if not generic_url.strip():
22
+ print("Please provide the information to get started")
23
+ return "Please provide the information to get started"
24
+
25
+ else:
26
+ try:
27
+ # Another level checking if the url is valid or not
28
+ response = requests.get(generic_url, timeout=5)
29
+ # response = requests.get(generic_url, verify=False)
30
+ if response.status_code == 200:
31
+ print("URL is valid and reachable.")
32
+ else:
33
+ print("Unable to reach")
34
+
35
+ # loading the website or yt video data
36
+ if "youtube.com" in generic_url:
37
+ video_id = generic_url.split("v=")[-1]
38
+ transcript = YouTubeTranscriptApi.get_transcript(video_id=video_id)
39
+ final_text = " ".join([entry['text'] for entry in transcript])
40
+
41
+ else:
42
+ loader=UnstructuredURLLoader(urls=[generic_url],ssl_verify=False,
43
+ headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_5_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36"})
44
+ docs=loader.load()
45
+ text=docs[0].page_content
46
+ cleaned_lines = [line.strip() for line in text.split("\n") if line.strip()]
47
+ final_text = "\n".join(cleaned_lines)
48
+
49
+ except requests.exceptions.ConnectionError as e:
50
+ print("Error reaching the URL:", e)
51
+ return "Pls enter valid url we have encounterd ConnectionError\n"+str(e)
52
+ except requests.exceptions.RequestException as e:
53
+ print("Error reaching the URL:", e)
54
+ return "Pls enter valid url we have encounterd RequestException\n"+str(e)
55
+ except Exception as e:
56
+ print(f"Exception:{e}")
57
+ return "We have encounterd the following error\n"+str(e)
58
+
59
+ return final_text
60
+
61
+ def terminal(command: str) -> str:
62
+ """Execute a terminal command and return the output
63
+
64
+ Args:
65
+ command: The command to execute
66
+
67
+ Returns:
68
+ The command output (stdout and stderr combined)
69
+ """
70
+ return (
71
+ "# Hey you are accessing a dummy terminal. \n"
72
+ "- Its very dangerous to exposing a terminal as a tool to public. \n"
73
+ "- If you want this terminal tool working in action, then checkout my youtube video: "
74
+ )
75
+
76
+
77
+
78
+ # Create multiple interfaces
79
+
80
+ text_extract_fn = gr.Interface(
81
+ fn=text_extract,
82
+ inputs=gr.Textbox(placeholder="Paste any website or youtube video url"),
83
+ outputs=gr.Textbox(placeholder="Text extracted from website or youtube video"),
84
+ title="Text Extractor",
85
+ description="Extract the text from any website or youtube video."
86
+ )
87
+
88
+ terminal_fn = gr.Interface(
89
+ fn=terminal,
90
+ inputs=gr.Textbox(placeholder="Enter you command"),
91
+ outputs="markdown",
92
+ flagging_mode="never", # Disables the flag button
93
+ title="Shell Server",
94
+ description="Runs the shell commands on your computer."
95
+ )
96
+
97
+ # Combine using tabs
98
+ demo = gr.TabbedInterface(
99
+ [text_extract_fn, terminal_fn],
100
+ ["Text Extractor", "Command Terminal"]
101
+ )
102
+
103
+ if __name__ == "__main__":
104
+ demo.launch(mcp_server=True)
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ gradio[mcp]
2
+ langchain-community
3
+ youtube-transcript-api
4
+ unstructured