Spaces:

AshanGimhana
/

my_scrapping1

Sleeping

App Files Files Community

my_scrapping1 / app.py

AshanGimhana

Create app.py

9e9c145 verified 4 months ago

raw

history blame contribute delete

4.5 kB

	import requests
	import pandas as pd
	from bs4 import BeautifulSoup
	from datetime import datetime
	import gradio as gr
	import os
	import schedule
	import time
	import threading

	def scrape_exchange_rates():
	url = "https://www.mcb.com.lk/exchange-rates/"
	try:
	r = requests.get(url, timeout=10)
	print(f"Status code: {r.status_code}")
	if r.status_code != 200:
	print(f"Failed to fetch URL: Status code {r.status_code}")
	return None

	soup = BeautifulSoup(r.text, "lxml")
	table = soup.find("table", class_="uael-text-break uael-column-rules uael-table")
	if not table:
	print("Table not found in webpage")
	return None

	headers = ["Currency Type", "Buying Rate", "Selling Rate"]
	rows = []
	for tr in table.find_all("tr")[2:]:
	cells = tr.find_all("td")
	if len(cells) >= 3:
	row = [
	cells[0].text.strip(),
	cells[1].text.strip(),
	cells[2].text.strip()
	]
	rows.append(row)

	if not rows:
	print("No data rows found in table")
	return None

	df = pd.DataFrame(rows, columns=headers)
	df['extracted_date'] = datetime.now().strftime('%Y-%m-%d')
	print("Scraping successful")
	return df
	except Exception as e:
	print(f"Scraping error: {str(e)}")
	return None

	def update_csv():
	csv_file = "data.csv"
	today = datetime.now().strftime('%Y-%m-%d')

	if not os.path.exists(csv_file):
	print("CSV file not found, creating new one")
	df = scrape_exchange_rates()
	if df is not None:
	df.to_csv(csv_file, index=False)
	print(f"Initial CSV created with data for {today}")
	else:
	print("Failed to create initial CSV - scraping returned no data")
	return

	try:
	existing_df = pd.read_csv(csv_file)
	except Exception as e:
	print(f"Error reading CSV: {e}")
	existing_df = pd.DataFrame(columns=["Currency Type", "Buying Rate", "Selling Rate", "extracted_date"])

	if 'extracted_date' not in existing_df.columns or today not in existing_df['extracted_date'].values:
	print(f"Updating data for {today}")
	new_df = scrape_exchange_rates()
	if new_df is not None:
	updated_df = pd.concat([existing_df, new_df], ignore_index=True)
	updated_df.to_csv(csv_file, index=False)
	print(f"Data updated successfully for {today}")
	else:
	print("Failed to update data - scraping returned no data")

	def run_schedule():
	schedule.every().day.at("14:00").do(update_csv)
	while True:
	schedule.run_pending()
	time.sleep(60)

	def start_scheduler():
	update_csv()
	scheduler_thread = threading.Thread(target=run_schedule, daemon=True)
	scheduler_thread.start()

	def get_display_data():
	csv_file = "data.csv"
	if os.path.exists(csv_file):
	try:
	df = pd.read_csv(csv_file)
	if 'extracted_date' in df.columns and not df.empty:
	last_date = df['extracted_date'].iloc[-1]
	return last_date, df
	except Exception as e:
	print(f"Error in get_display_data: {e}")
	return "No data available yet", pd.DataFrame(columns=["Currency Type", "Buying Rate", "Selling Rate", "extracted_date"])

	# Gradio interface
	with gr.Blocks(title="Exchange Rate Monitor") as demo:
	gr.Markdown("# Exchange Rate Monitoring System")

	with gr.Row():
	last_update = gr.Textbox(label="Last Data Scraping Date", value="Loading...")

	with gr.Row():
	data_table = gr.Dataframe(
	label="Exchange Rates History",
	headers=["Currency Type", "Buying Rate", "Selling Rate", "extracted_date"],
	datatype=["str", "str", "str", "str"],
	interactive=False
	)

	refresh_btn = gr.Button("Refresh Data")

	def update_display():
	last_date, df = get_display_data()
	return last_date, df

	demo.load(
	fn=update_display,
	inputs=None,
	outputs=[last_update, data_table]
	)

	refresh_btn.click(
	fn=update_display,
	inputs=None,
	outputs=[last_update, data_table]
	)

	demo.load(fn=start_scheduler, inputs=None, outputs=None)

	demo.launch(debug=True)