Spaces:

AshanGimhana
/

my_scrapping1

Sleeping

File size: 4,504 Bytes

9e9c145

import requests
import pandas as pd
from bs4 import BeautifulSoup
from datetime import datetime
import gradio as gr
import os
import schedule
import time
import threading

def scrape_exchange_rates():
    url = "https://www.mcb.com.lk/exchange-rates/"
    try:
        r = requests.get(url, timeout=10)
        print(f"Status code: {r.status_code}")
        if r.status_code != 200:
            print(f"Failed to fetch URL: Status code {r.status_code}")
            return None
        
        soup = BeautifulSoup(r.text, "lxml")
        table = soup.find("table", class_="uael-text-break uael-column-rules uael-table")
        if not table:
            print("Table not found in webpage")
            return None
        
        headers = ["Currency Type", "Buying Rate", "Selling Rate"]
        rows = []
        for tr in table.find_all("tr")[2:]:
            cells = tr.find_all("td")
            if len(cells) >= 3:
                row = [
                    cells[0].text.strip(),
                    cells[1].text.strip(),
                    cells[2].text.strip()
                ]
                rows.append(row)
        
        if not rows:
            print("No data rows found in table")
            return None
            
        df = pd.DataFrame(rows, columns=headers)
        df['extracted_date'] = datetime.now().strftime('%Y-%m-%d')
        print("Scraping successful")
        return df
    except Exception as e:
        print(f"Scraping error: {str(e)}")
        return None

def update_csv():
    csv_file = "data.csv"
    today = datetime.now().strftime('%Y-%m-%d')
    
    if not os.path.exists(csv_file):
        print("CSV file not found, creating new one")
        df = scrape_exchange_rates()
        if df is not None:
            df.to_csv(csv_file, index=False)
            print(f"Initial CSV created with data for {today}")
        else:
            print("Failed to create initial CSV - scraping returned no data")
        return
    
    try:
        existing_df = pd.read_csv(csv_file)
    except Exception as e:
        print(f"Error reading CSV: {e}")
        existing_df = pd.DataFrame(columns=["Currency Type", "Buying Rate", "Selling Rate", "extracted_date"])
    
    if 'extracted_date' not in existing_df.columns or today not in existing_df['extracted_date'].values:
        print(f"Updating data for {today}")
        new_df = scrape_exchange_rates()
        if new_df is not None:
            updated_df = pd.concat([existing_df, new_df], ignore_index=True)
            updated_df.to_csv(csv_file, index=False)
            print(f"Data updated successfully for {today}")
        else:
            print("Failed to update data - scraping returned no data")

def run_schedule():
    schedule.every().day.at("14:00").do(update_csv)
    while True:
        schedule.run_pending()
        time.sleep(60)

def start_scheduler():
    update_csv()
    scheduler_thread = threading.Thread(target=run_schedule, daemon=True)
    scheduler_thread.start()

def get_display_data():
    csv_file = "data.csv"
    if os.path.exists(csv_file):
        try:
            df = pd.read_csv(csv_file)
            if 'extracted_date' in df.columns and not df.empty:
                last_date = df['extracted_date'].iloc[-1]
                return last_date, df
        except Exception as e:
            print(f"Error in get_display_data: {e}")
    return "No data available yet", pd.DataFrame(columns=["Currency Type", "Buying Rate", "Selling Rate", "extracted_date"])

# Gradio interface
with gr.Blocks(title="Exchange Rate Monitor") as demo:
    gr.Markdown("# Exchange Rate Monitoring System")
    
    with gr.Row():
        last_update = gr.Textbox(label="Last Data Scraping Date", value="Loading...")
    
    with gr.Row():
        data_table = gr.Dataframe(
            label="Exchange Rates History",
            headers=["Currency Type", "Buying Rate", "Selling Rate", "extracted_date"],
            datatype=["str", "str", "str", "str"],
            interactive=False
        )
    
    refresh_btn = gr.Button("Refresh Data")
    
    def update_display():
        last_date, df = get_display_data()
        return last_date, df
    
    demo.load(
        fn=update_display,
        inputs=None,
        outputs=[last_update, data_table]
    )
    
    refresh_btn.click(
        fn=update_display,
        inputs=None,
        outputs=[last_update, data_table]
    )
    
    demo.load(fn=start_scheduler, inputs=None, outputs=None)

demo.launch(debug=True)