my_scrapping1 / app.py
AshanGimhana's picture
Create app.py
9e9c145 verified
import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
import gradio as gr
import os
import schedule
import time
import threading
def scrape_exchange_rates():
url = "https://www.mcb.com.lk/exchange-rates/"
try:
r = requests.get(url, timeout=10)
print(f"Status code: {r.status_code}")
if r.status_code != 200:
print(f"Failed to fetch URL: Status code {r.status_code}")
return None
soup = BeautifulSoup(r.text, "lxml")
table = soup.find("table", class_="uael-text-break uael-column-rules uael-table")
if not table:
print("Table not found in webpage")
return None
headers = ["Currency Type", "Buying Rate", "Selling Rate"]
rows = []
for tr in table.find_all("tr")[2:]:
cells = tr.find_all("td")
if len(cells) >= 3:
row = [
cells[0].text.strip(),
cells[1].text.strip(),
cells[2].text.strip()
]
rows.append(row)
if not rows:
print("No data rows found in table")
return None
df = pd.DataFrame(rows, columns=headers)
df['extracted_date'] = datetime.now().strftime('%Y-%m-%d')
print("Scraping successful")
return df
except Exception as e:
print(f"Scraping error: {str(e)}")
return None
def update_csv():
csv_file = "data.csv"
today = datetime.now().strftime('%Y-%m-%d')
if not os.path.exists(csv_file):
print("CSV file not found, creating new one")
df = scrape_exchange_rates()
if df is not None:
df.to_csv(csv_file, index=False)
print(f"Initial CSV created with data for {today}")
else:
print("Failed to create initial CSV - scraping returned no data")
return
try:
existing_df = pd.read_csv(csv_file)
except Exception as e:
print(f"Error reading CSV: {e}")
existing_df = pd.DataFrame(columns=["Currency Type", "Buying Rate", "Selling Rate", "extracted_date"])
if 'extracted_date' not in existing_df.columns or today not in existing_df['extracted_date'].values:
print(f"Updating data for {today}")
new_df = scrape_exchange_rates()
if new_df is not None:
updated_df = pd.concat([existing_df, new_df], ignore_index=True)
updated_df.to_csv(csv_file, index=False)
print(f"Data updated successfully for {today}")
else:
print("Failed to update data - scraping returned no data")
def run_schedule():
schedule.every().day.at("14:00").do(update_csv)
while True:
schedule.run_pending()
time.sleep(60)
def start_scheduler():
update_csv()
scheduler_thread = threading.Thread(target=run_schedule, daemon=True)
scheduler_thread.start()
def get_display_data():
csv_file = "data.csv"
if os.path.exists(csv_file):
try:
df = pd.read_csv(csv_file)
if 'extracted_date' in df.columns and not df.empty:
last_date = df['extracted_date'].iloc[-1]
return last_date, df
except Exception as e:
print(f"Error in get_display_data: {e}")
return "No data available yet", pd.DataFrame(columns=["Currency Type", "Buying Rate", "Selling Rate", "extracted_date"])
# Gradio interface
with gr.Blocks(title="Exchange Rate Monitor") as demo:
gr.Markdown("# Exchange Rate Monitoring System")
with gr.Row():
last_update = gr.Textbox(label="Last Data Scraping Date", value="Loading...")
with gr.Row():
data_table = gr.Dataframe(
label="Exchange Rates History",
headers=["Currency Type", "Buying Rate", "Selling Rate", "extracted_date"],
datatype=["str", "str", "str", "str"],
interactive=False
)
refresh_btn = gr.Button("Refresh Data")
def update_display():
last_date, df = get_display_data()
return last_date, df
demo.load(
fn=update_display,
inputs=None,
outputs=[last_update, data_table]
)
refresh_btn.click(
fn=update_display,
inputs=None,
outputs=[last_update, data_table]
)
demo.load(fn=start_scheduler, inputs=None, outputs=None)
demo.launch(debug=True)