muhammadsalmanalfaridzi commited on
Commit
943f6f4
·
verified ·
1 Parent(s): 8151600

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +242 -0
app.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import os
3
+ import tempfile
4
+ import gc
5
+ import base64
6
+ import time
7
+ import yaml
8
+
9
+ from tqdm import tqdm
10
+ from crawl4ai_scrapper import * # Import Crawl4AI Scraper
11
+ from dotenv import load_dotenv
12
+ load_dotenv()
13
+
14
+ from crewai import Agent, Crew, Process, Task, LLM
15
+ from crewai_tools import FileReadTool
16
+
17
+ docs_tool = FileReadTool()
18
+
19
+ # Using the Cerebras Llama 3.3 70B model
20
+ def load_llm():
21
+ # Set up the Cerebras model with the Llama 3.3 70B API
22
+ llm = LLM(model="llama3.3-70B", api_key=os.getenv("CEREBRAS_API_KEY"))
23
+ return llm
24
+
25
+ # ===========================
26
+ # Define Agents & Tasks
27
+ # ===========================
28
+ def create_agents_and_tasks():
29
+ """Creates a Crew for analysis of the channel scraped output"""
30
+ with open("config.yaml", 'r') as file:
31
+ config = yaml.safe_load(file)
32
+
33
+ analysis_agent = Agent(
34
+ role=config["agents"][0]["role"],
35
+ goal=config["agents"][0]["goal"],
36
+ backstory=config["agents"][0]["backstory"],
37
+ verbose=True,
38
+ tools=[docs_tool],
39
+ llm=load_llm()
40
+ )
41
+
42
+ response_synthesizer_agent = Agent(
43
+ role=config["agents"][1]["role"],
44
+ goal=config["agents"][1]["goal"],
45
+ backstory=config["agents"][1]["backstory"],
46
+ verbose=True,
47
+ llm=load_llm()
48
+ )
49
+
50
+ analysis_task = Task(
51
+ description=config["tasks"][0]["description"],
52
+ expected_output=config["tasks"][0]["expected_output"],
53
+ agent=analysis_agent
54
+ )
55
+
56
+ response_task = Task(
57
+ description=config["tasks"][1]["description"],
58
+ expected_output=config["tasks"][1]["expected_output"],
59
+ agent=response_synthesizer_agent
60
+ )
61
+
62
+ crew = Crew(
63
+ agents=[analysis_agent, response_synthesizer_agent],
64
+ tasks=[analysis_task, response_task],
65
+ process=Process.sequential,
66
+ verbose=True
67
+ )
68
+ return crew
69
+
70
+ # ===========================
71
+ # Streamlit Setup
72
+ # ===========================
73
+ st.markdown("""
74
+ # YouTube Trend Analysis powered by <img src="data:image/png;base64,{}" width="120" style="vertical-align: -3px;"> & <img src="data:image/png;base64,{}" width="120" style="vertical-align: -3px;">
75
+ """.format(base64.b64encode(open("assets/crewai.png", "rb").read()).decode(), base64.b64encode(open("assets/crawl4ai.png", "rb").read()).decode()), unsafe_allow_html=True)
76
+
77
+ if "messages" not in st.session_state:
78
+ st.session_state.messages = [] # Chat history
79
+
80
+ if "response" not in st.session_state:
81
+ st.session_state.response = None
82
+
83
+ if "crew" not in st.session_state:
84
+ st.session_state.crew = None # Store the Crew object
85
+
86
+ def reset_chat():
87
+ st.session_state.messages = []
88
+ gc.collect()
89
+
90
+ def start_analysis():
91
+ # Create a status container
92
+ with st.spinner('Scraping videos... This may take a moment.'):
93
+
94
+ status_container = st.empty()
95
+ status_container.info("Extracting videos from the channels...")
96
+
97
+ # Trigger Crawl4AI scraping instead of BrightData
98
+ channel_snapshot_id = trigger_scraping_channels(st.session_state.youtube_channels, 10, st.session_state.start_date, st.session_state.end_date, "Latest", "")
99
+ status = get_progress(channel_snapshot_id['snapshot_id'])
100
+
101
+ while status['status'] != "ready":
102
+ status_container.info(f"Current status: {status['status']}")
103
+ time.sleep(10)
104
+ status = get_progress(channel_snapshot_id['snapshot_id'])
105
+
106
+ if status['status'] == "failed":
107
+ status_container.error(f"Scraping failed: {status}")
108
+ return
109
+
110
+ if status['status'] == "ready":
111
+ status_container.success("Scraping completed successfully!")
112
+
113
+ # Show a list of YouTube videos here in a scrollable container
114
+ channel_scrapped_output = get_output(status['snapshot_id'], format="json")
115
+
116
+ st.markdown("## YouTube Videos Extracted")
117
+ # Create a container for the carousel
118
+ carousel_container = st.container()
119
+
120
+ # Calculate number of videos per row (adjust as needed)
121
+ videos_per_row = 3
122
+
123
+ with carousel_container:
124
+ num_videos = len(channel_scrapped_output[0])
125
+ num_rows = (num_videos + videos_per_row - 1) // videos_per_row
126
+
127
+ for row in range(num_rows):
128
+ # Create columns for each row
129
+ cols = st.columns(videos_per_row)
130
+
131
+ # Fill each column with a video
132
+ for col_idx in range(videos_per_row):
133
+ video_idx = row * videos_per_row + col_idx
134
+
135
+ # Check if we still have videos to display
136
+ if video_idx < num_videos:
137
+ with cols[col_idx]:
138
+ st.video(channel_scrapped_output[0][video_idx]['url'])
139
+
140
+ status_container.info("Processing transcripts...")
141
+ st.session_state.all_files = []
142
+ # Calculate transcripts
143
+ for i in tqdm(range(len(channel_scrapped_output[0]))):
144
+
145
+ # Save transcript to file
146
+ youtube_video_id = channel_scrapped_output[0][i]['shortcode']
147
+
148
+ file = "transcripts/" + youtube_video_id + ".txt"
149
+ st.session_state.all_files.append(file)
150
+
151
+ with open(file, "w") as f:
152
+ for j in range(len(channel_scrapped_output[0][i]['formatted_transcript'])):
153
+ text = channel_scrapped_output[0][i]['formatted_transcript'][j]['text']
154
+ start_time = channel_scrapped_output[0][i]['formatted_transcript'][j]['start_time']
155
+ end_time = channel_scrapped_output[0][i]['formatted_transcript'][j]['end_time']
156
+ f.write(f"({start_time:.2f}-{end_time:.2f}): {text}\n")
157
+ f.close()
158
+
159
+ st.session_state.channel_scrapped_output = channel_scrapped_output
160
+ status_container.success("Scraping complete! We shall now analyze the videos and report trends...")
161
+
162
+ else:
163
+ status_container.error(f"Scraping failed with status: {status}")
164
+
165
+ if status['status'] == "ready":
166
+
167
+ status_container = st.empty()
168
+ with st.spinner('The agent is analyzing the videos... This may take a moment.'):
169
+ # create crew
170
+ st.session_state.crew = create_agents_and_tasks()
171
+ st.session_state.response = st.session_state.crew.kickoff(inputs={"file_paths": ", ".join(st.session_state.all_files)})
172
+
173
+ # ===========================
174
+ # Sidebar
175
+ # ===========================
176
+ with st.sidebar:
177
+ st.header("YouTube Channels")
178
+
179
+ if "youtube_channels" not in st.session_state:
180
+ st.session_state.youtube_channels = [""] # Start with one empty field
181
+
182
+ # Function to add new channel field
183
+ def add_channel_field():
184
+ st.session_state.youtube_channels.append("")
185
+
186
+ # Create input fields for each channel
187
+ for i, channel in enumerate(st.session_state.youtube_channels):
188
+ col1, col2 = st.columns([6, 1])
189
+ with col1:
190
+ st.session_state.youtube_channels[i] = st.text_input(
191
+ "Channel URL",
192
+ value=channel,
193
+ key=f"channel_{i}",
194
+ label_visibility="collapsed"
195
+ )
196
+ with col2:
197
+ if i > 0:
198
+ if st.button("❌", key=f"remove_{i}"):
199
+ st.session_state.youtube_channels.pop(i)
200
+ st.rerun()
201
+
202
+ st.button("Add Channel ➕", on_click=add_channel_field)
203
+
204
+ st.divider()
205
+
206
+ st.subheader("Date Range")
207
+ col1, col2 = st.columns(2)
208
+ with col1:
209
+ start_date = st.date_input("Start Date")
210
+ st.session_state.start_date = start_date
211
+ st.session_state.start_date = start_date.strftime("%Y-%m-%d")
212
+ with col2:
213
+ end_date = st.date_input("End Date")
214
+ st.session_state.end_date = end_date
215
+ st.session_state.end_date = end_date.strftime("%Y-%m-%d")
216
+
217
+ st.divider()
218
+ st.button("Start Analysis 🚀", type="primary", on_click=start_analysis)
219
+
220
+ # ===========================
221
+ # Main Chat Interface
222
+ # ===========================
223
+ if st.session_state.response:
224
+ with st.spinner('Generating content... This may take a moment.'):
225
+ try:
226
+ result = st.session_state.response
227
+ st.markdown("### Generated Analysis")
228
+ st.markdown(result)
229
+
230
+ # Add download button
231
+ st.download_button(
232
+ label="Download Content",
233
+ data=result.raw,
234
+ file_name=f"youtube_trend_analysis.md",
235
+ mime="text/markdown"
236
+ )
237
+ except Exception as e:
238
+ st.error(f"An error occurred: {str(e)}")
239
+
240
+ # Footer
241
+ st.markdown("---")
242
+ st.markdown("Built with CrewAI, Crawl4AI and Streamlit")