kritsadaK commited on
Commit
aa11c9e
·
1 Parent(s): 01eb191

add app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -0
app.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ from statsmodels.tsa.arima.model import ARIMA
4
+ from sklearn.metrics import mean_squared_error
5
+ import numpy as np
6
+
7
+ # Function to load and combine data from the provided URLs
8
+ def load_data(urls):
9
+ dataframes = []
10
+ for url in urls:
11
+ try:
12
+ df = pd.read_csv(url, encoding='latin1')
13
+ dataframes.append(df)
14
+ except Exception as e:
15
+ st.write(f"Error reading {url}: {e}")
16
+ combined_df = pd.concat(dataframes, ignore_index=True)
17
+ combined_df['Date'] = pd.to_datetime(combined_df['Date'], errors='coerce')
18
+ combined_df = combined_df.sort_values(by='Date')
19
+ return combined_df
20
+
21
+ # URLs to the CSV files
22
+ urls = [
23
+ "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/f5f7f32c-b56b-40f1-a513-d2ac6cd07d3a/download/aqpm10thailand2554.csv",
24
+ "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/279cdb5a-f881-4b42-953a-c626bcec95b7/download/aqpm10thailand2555.csv",
25
+ "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/15a0e00f-ba2f-477f-a5d5-11479c86e76f/download/aqpm10thailand2556.csv",
26
+ "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/5d19474f-8062-4321-a0f8-d34ecd92f19d/download/aqpm10thailand2557.csv",
27
+ "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/81ef4d84-920d-4e30-a008-605156b34ffc/download/aqpm10thailand2558.csv",
28
+ "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/c2cb7e5c-3990-4c2b-bd9b-69e82ea35c96/download/aqpm10thailand2559.csv",
29
+ "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/a9aa40f0-9ad0-4b2e-97c9-d1c92de8766a/download/aqpm10thailand2560.csv",
30
+ "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/cd40ac24-c553-476f-bf16-111046fdd3da/download/aqpm10thailand2561.csv",
31
+ "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/9e44b0dc-d9ee-4844-b9da-e8efb8002e11/download/aqpm10thailand2562.csv",
32
+ "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/85d1c2a5-4098-4829-86f5-282063bb07fe/download/aqpm10thailand2563.csv",
33
+ "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/d2895cde-77a1-4b62-b2c8-deafcee91dda/download/aqpm10thailand2564.csv",
34
+ "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/77407f63-3059-40f9-9411-c74b26a63e16/download/pm102022-65.csv",
35
+ "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/c4aa6dfb-e2ea-4b0c-a953-619e2bcb43af/download/pm102023-66.csv"
36
+ ]
37
+
38
+ # Load the data
39
+ st.title("Thailand PM10 Prediction App by using Basic ARIMA model")
40
+ st.write("ข้อมูลตรวจวัดคุณภาพอากาศจากสถานีตรวจวัดคุณภาพอากาศอัตโนมัติ (PM10) พื้นที่ทั่วประเทศ")
41
+ st.write("source of dataset: https://data.go.th/dataset/pm10")
42
+ combined_df = load_data(urls)
43
+ st.write("Data Loaded")
44
+
45
+ # Show the first few rows of the DataFrame
46
+ st.write("Sample Data (2011-2021)")
47
+ st.write(combined_df.head())
48
+
49
+ # Load and display the specific CSV file provided in the link
50
+ csv_url = "https://pcd.gdcatalog.go.th/dataset/d86c62ec-b6e5-4577-82e3-6dee5e423634/resource/9677d250-4d5d-40a4-a070-33182ffbec00/download/-2564.csv"
51
+ st.write("List of air quality monitoring stations:")
52
+ station_info_df = pd.read_csv(csv_url, encoding='utf-8-sig') # Change encoding to 'utf-8-sig' for Thai language support
53
+ st.write(station_info_df)
54
+
55
+ # Allow the user to select a column for prediction
56
+ column_to_predict = st.selectbox("Select a Time Series Column for Prediction and press ENTER", combined_df.columns[1:])
57
+
58
+ # Prepare the data for the selected column
59
+ series = combined_df[['Date', column_to_predict]].dropna()
60
+
61
+ # Convert the selected column to numeric, coercing any errors
62
+ series[column_to_predict] = pd.to_numeric(series[column_to_predict], errors='coerce')
63
+
64
+ # Drop any rows with NaN values that might have been introduced by the conversion
65
+ series.dropna(inplace=True)
66
+
67
+ # Set the date as the index
68
+ series.set_index('Date', inplace=True)
69
+
70
+ # Split the data into training and testing sets
71
+ train_size = int(len(series) * 0.8)
72
+ train, test = series[:train_size], series[train_size:]
73
+
74
+ # Fit ARIMA model
75
+ model = ARIMA(train, order=(5,1,0))
76
+ model_fit = model.fit()
77
+
78
+ # Make predictions
79
+ predictions = model_fit.forecast(steps=len(test))
80
+ predictions = pd.DataFrame(predictions)
81
+ predictions.columns = ['Predicted_PM']
82
+ predictions.index = test.index
83
+
84
+ # Evaluate the model
85
+ mse = mean_squared_error(test, predictions)
86
+ rmse = np.sqrt(mse)
87
+ st.write(f"Root Mean Squared Error (RMSE): {rmse}")
88
+
89
+ # Display the results with Date and PM values
90
+ st.write("Predicted PM values with Dates:")
91
+ predictions = predictions.reset_index() # Reset index to make it a column
92
+ predictions.rename(columns={'index': 'Date'}, inplace=True) # Rename the index column to 'Date'
93
+ predictions['Date'] = predictions['Date'].dt.date # Convert datetime to date
94
+ st.write(predictions)