Spaces:
Running
Running
feat: update
Browse files- .github/workflows/pipeline.yaml +81 -80
- Dockerfile +54 -24
- restful/onnx_utilities.py +121 -0
- restful/services.py +3 -2
.github/workflows/pipeline.yaml
CHANGED
@@ -7,8 +7,9 @@ on:
|
|
7 |
tags:
|
8 |
- '*'
|
9 |
schedule:
|
10 |
-
- cron: "0
|
11 |
-
#
|
|
|
12 |
|
13 |
jobs:
|
14 |
extraction_train_modeling:
|
@@ -44,90 +45,90 @@ jobs:
|
|
44 |
echo "match=false" >> $GITHUB_ENV
|
45 |
fi
|
46 |
|
47 |
-
- name: Scraping Yahoo Finance
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
- name: Install Libraries
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
- name: Modeling and Training
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
|
67 |
- name: Set Pipeline Schedule
|
68 |
if: env.match != 'true'
|
69 |
run: echo "$(date +'%Y-%m-%d')" > pipeline_schedule.ctl
|
70 |
|
71 |
-
- name: Zip Posttrained, Models, and Pickles
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
- name: Store Datasets to Google Drive
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
84 |
-
|
85 |
-
|
86 |
-
|
87 |
-
|
88 |
|
89 |
-
- name: Store Models to Google Drive
|
90 |
-
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
|
99 |
-
- name: Store Pickles to Google Drive
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
- name: Store Posttrained to Google Drive
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
- name: Remove Temporarary Files and Directories
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
|
132 |
- name: Commit changes
|
133 |
if: env.match != 'true'
|
|
|
7 |
tags:
|
8 |
- '*'
|
9 |
schedule:
|
10 |
+
- cron: "0 22 * * *"
|
11 |
+
# 5 - 7 = [ -2 ]
|
12 |
+
# karena minus, jadi 24 - 2 = [ 22 ]
|
13 |
|
14 |
jobs:
|
15 |
extraction_train_modeling:
|
|
|
45 |
echo "match=false" >> $GITHUB_ENV
|
46 |
fi
|
47 |
|
48 |
+
# - name: Scraping Yahoo Finance
|
49 |
+
# if: env.match != 'true'
|
50 |
+
# run: |
|
51 |
+
# mkdir datasets
|
52 |
+
# wget https://github.com/belajarqywok/cryptocurrency_prediction/raw/main/postman/symbols.json \
|
53 |
+
# -O postman/symbols.json
|
54 |
+
# go run scraper.go
|
55 |
+
|
56 |
+
# - name: Install Libraries
|
57 |
+
# if: env.match != 'true'
|
58 |
+
# run: pip install -r requirements.txt
|
59 |
+
|
60 |
+
# - name: Modeling and Training
|
61 |
+
# if: env.match != 'true'
|
62 |
+
# run: |
|
63 |
+
# mkdir models
|
64 |
+
# mkdir pickles
|
65 |
+
# mkdir posttrained
|
66 |
+
# python training.py
|
67 |
|
68 |
- name: Set Pipeline Schedule
|
69 |
if: env.match != 'true'
|
70 |
run: echo "$(date +'%Y-%m-%d')" > pipeline_schedule.ctl
|
71 |
|
72 |
+
# - name: Zip Posttrained, Models, and Pickles
|
73 |
+
# if: env.match != 'true'
|
74 |
+
# run: |
|
75 |
+
# zip -r models.zip models
|
76 |
+
# zip -r pickles.zip pickles
|
77 |
+
# zip -r datasets.zip datasets
|
78 |
+
# zip -r posttrained.zip posttrained
|
79 |
+
|
80 |
+
# - name: Store Datasets to Google Drive
|
81 |
+
# if: env.match != 'true'
|
82 |
+
# uses: adityak74/google-drive-upload-git-action@main
|
83 |
+
# with:
|
84 |
+
# credentials: ${{ secrets.GDRIVE_CRED }}
|
85 |
+
# filename: datasets.zip
|
86 |
+
# folderId: ${{ secrets.GDRIVE_ID }}
|
87 |
+
# name: datasets.zip
|
88 |
+
# overwrite: "true"
|
89 |
|
90 |
+
# - name: Store Models to Google Drive
|
91 |
+
# if: env.match != 'true'
|
92 |
+
# uses: adityak74/google-drive-upload-git-action@main
|
93 |
+
# with:
|
94 |
+
# credentials: ${{ secrets.GDRIVE_CRED }}
|
95 |
+
# filename: models.zip
|
96 |
+
# folderId: ${{ secrets.GDRIVE_ID }}
|
97 |
+
# name: models.zip
|
98 |
+
# overwrite: "true"
|
99 |
+
|
100 |
+
# - name: Store Pickles to Google Drive
|
101 |
+
# if: env.match != 'true'
|
102 |
+
# uses: adityak74/google-drive-upload-git-action@main
|
103 |
+
# with:
|
104 |
+
# credentials: ${{ secrets.GDRIVE_CRED }}
|
105 |
+
# filename: pickles.zip
|
106 |
+
# folderId: ${{ secrets.GDRIVE_ID }}
|
107 |
+
# name: pickles.zip
|
108 |
+
# overwrite: "true"
|
109 |
+
|
110 |
+
# - name: Store Posttrained to Google Drive
|
111 |
+
# if: env.match != 'true'
|
112 |
+
# uses: adityak74/google-drive-upload-git-action@main
|
113 |
+
# with:
|
114 |
+
# credentials: ${{ secrets.GDRIVE_CRED }}
|
115 |
+
# filename: posttrained.zip
|
116 |
+
# folderId: ${{ secrets.GDRIVE_ID }}
|
117 |
+
# name: posttrained.zip
|
118 |
+
# overwrite: "true"
|
119 |
+
|
120 |
+
# - name: Remove Temporarary Files and Directories
|
121 |
+
# if: env.match != 'true'
|
122 |
+
# run: |
|
123 |
+
# rm models.zip
|
124 |
+
# rm pickles.zip
|
125 |
+
# rm datasets.zip
|
126 |
+
# rm posttrained.zip
|
127 |
+
|
128 |
+
# rm -rf models
|
129 |
+
# rm -rf pickles
|
130 |
+
# rm -rf datasets
|
131 |
+
# rm -rf posttrained
|
132 |
|
133 |
- name: Commit changes
|
134 |
if: env.match != 'true'
|
Dockerfile
CHANGED
@@ -1,44 +1,74 @@
|
|
1 |
-
FROM python:3.
|
2 |
|
3 |
LABEL organization="R6Q - Infraprasta University"
|
4 |
-
LABEL team="Group 5"
|
5 |
|
6 |
RUN useradd -m -u 1000 user
|
7 |
|
8 |
WORKDIR /app
|
9 |
|
10 |
COPY --chown=user ./requirements.txt requirements.txt
|
11 |
-
|
12 |
-
|
|
|
|
|
13 |
|
14 |
COPY --chown=user . /app
|
15 |
|
16 |
-
RUN
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
RUN pip install cython
|
20 |
|
21 |
-
RUN cd /app/restful/cutils && \
|
22 |
-
|
23 |
-
|
24 |
|
25 |
-
RUN pip install gdown
|
26 |
|
27 |
-
RUN --mount=type=secret,id=MODELS_ID,mode=0444,required=true \
|
28 |
-
|
29 |
-
|
30 |
|
31 |
-
RUN --mount=type=secret,id=PICKLES_ID,mode=0444,required=true \
|
32 |
-
|
33 |
-
|
34 |
|
35 |
-
RUN --mount=type=secret,id=DATASETS_ID,mode=0444,required=true \
|
36 |
-
|
37 |
-
|
38 |
|
39 |
-
RUN --mount=type=secret,id=POSTTRAINED_ID,mode=0444,required=true \
|
40 |
-
|
41 |
-
|
42 |
|
43 |
|
44 |
-
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--workers", "10", "--port", "7860"]
|
|
|
1 |
+
FROM python:3.11-bullseye
|
2 |
|
3 |
LABEL organization="R6Q - Infraprasta University"
|
|
|
4 |
|
5 |
RUN useradd -m -u 1000 user
|
6 |
|
7 |
WORKDIR /app
|
8 |
|
9 |
COPY --chown=user ./requirements.txt requirements.txt
|
10 |
+
RUN apt-get update && \
|
11 |
+
apt-get install -y gcc python3-dev git git-lfs curl && \
|
12 |
+
pip install --no-cache-dir --upgrade -r requirements.txt && \
|
13 |
+
pip install cython onnxruntime==1.20.1
|
14 |
|
15 |
COPY --chown=user . /app
|
16 |
|
17 |
+
RUN git lfs install && \
|
18 |
+
git clone https://huggingface.co/datasets/qywok/indonesia_stocks && \
|
19 |
+
mkdir -p models && \
|
20 |
+
for i in $(seq 1 10); do \
|
21 |
+
git clone https://huggingface.co/qywok/stock_models_$i && \
|
22 |
+
cd stock_models_$i && git lfs pull && cd .. && \
|
23 |
+
mv stock_models_$i/*.onnx models/ && \
|
24 |
+
rm -rf stock_models_$i; \
|
25 |
+
done
|
26 |
+
|
27 |
+
RUN chmod -R 755 /app
|
28 |
+
|
29 |
+
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
|
30 |
+
|
31 |
+
# FROM python:3.9-bullseye
|
32 |
+
|
33 |
+
# LABEL organization="R6Q - Infraprasta University"
|
34 |
+
# LABEL team="Group 5"
|
35 |
+
|
36 |
+
# RUN useradd -m -u 1000 user
|
37 |
+
|
38 |
+
# WORKDIR /app
|
39 |
+
|
40 |
+
# COPY --chown=user ./requirements.txt requirements.txt
|
41 |
+
|
42 |
+
# RUN pip install --no-cache-dir --upgrade -r requirements.txt
|
43 |
+
|
44 |
+
# COPY --chown=user . /app
|
45 |
+
|
46 |
+
# RUN apt-get update && \
|
47 |
+
# apt-get install -y gcc python3-dev gnupg curl
|
48 |
|
49 |
+
# RUN pip install cython
|
50 |
|
51 |
+
# RUN cd /app/restful/cutils && \
|
52 |
+
# python setup.py build_ext --inplace && \
|
53 |
+
# chmod 777 * && cd ../..
|
54 |
|
55 |
+
# RUN pip install gdown
|
56 |
|
57 |
+
# RUN --mount=type=secret,id=MODELS_ID,mode=0444,required=true \
|
58 |
+
# gdown https://drive.google.com/uc?id=$(cat /run/secrets/MODELS_ID) && \
|
59 |
+
# unzip models.zip && rm models.zip
|
60 |
|
61 |
+
# RUN --mount=type=secret,id=PICKLES_ID,mode=0444,required=true \
|
62 |
+
# gdown https://drive.google.com/uc?id=$(cat /run/secrets/PICKLES_ID) && \
|
63 |
+
# unzip pickles.zip && rm pickles.zip
|
64 |
|
65 |
+
# RUN --mount=type=secret,id=DATASETS_ID,mode=0444,required=true \
|
66 |
+
# gdown https://drive.google.com/uc?id=$(cat /run/secrets/DATASETS_ID) && \
|
67 |
+
# unzip datasets.zip && rm datasets.zip
|
68 |
|
69 |
+
# RUN --mount=type=secret,id=POSTTRAINED_ID,mode=0444,required=true \
|
70 |
+
# gdown https://drive.google.com/uc?id=$(cat /run/secrets/POSTTRAINED_ID) && \
|
71 |
+
# unzip posttrained.zip && rm posttrained.zip
|
72 |
|
73 |
|
74 |
+
# CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--workers", "10", "--port", "7860"]
|
restful/onnx_utilities.py
ADDED
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import json
|
3 |
+
import numpy as np
|
4 |
+
import pandas as pd
|
5 |
+
import onnxruntime as ort
|
6 |
+
from numpy import append, expand_dims
|
7 |
+
from decimal import Decimal, ROUND_DOWN
|
8 |
+
from pandas import read_csv, to_datetime, Timedelta
|
9 |
+
|
10 |
+
class Utilities:
|
11 |
+
def __init__(self) -> None:
|
12 |
+
self.model_path = './models'
|
13 |
+
self.posttrained_path = './indonesia_stocks/modeling_datas'
|
14 |
+
self.scaler_path = './indonesia_stocks/min_max'
|
15 |
+
|
16 |
+
# def truncate_2_decimal(self, val: float):
|
17 |
+
# return float(Decimal(str(val)).quantize(Decimal('0.01'), rounding=ROUND_DOWN))
|
18 |
+
def truncate_2_decimal(self, val: float):
|
19 |
+
try:
|
20 |
+
return float(Decimal(str(float(val))).quantize(Decimal('0.001'), rounding=ROUND_DOWN))
|
21 |
+
except Exception as e:
|
22 |
+
print("Decimal error:", e)
|
23 |
+
return float(val)
|
24 |
+
|
25 |
+
|
26 |
+
def denormalization(self, data, min_value, max_value):
|
27 |
+
return (data * (max_value - min_value)) + min_value
|
28 |
+
|
29 |
+
async def cryptocurrency_prediction_utils(self,
|
30 |
+
days: int, sequence_length: int, model_name: str) -> tuple:
|
31 |
+
|
32 |
+
model_path = os.path.join(self.model_path, f'{model_name}.onnx')
|
33 |
+
# session = ort.InferenceSession(model_path)
|
34 |
+
try:
|
35 |
+
session = ort.InferenceSession(model_path)
|
36 |
+
except Exception as e:
|
37 |
+
print("ONNX model load error:", e)
|
38 |
+
return [], []
|
39 |
+
input_name = session.get_inputs()[0].name
|
40 |
+
|
41 |
+
dataframe_path = os.path.join(self.posttrained_path, f'{model_name}.csv')
|
42 |
+
dataframe = read_csv(dataframe_path, index_col='Date', parse_dates=True)
|
43 |
+
|
44 |
+
scaler_path = os.path.join(self.scaler_path, f'{model_name}.json')
|
45 |
+
with open(scaler_path, 'r') as f:
|
46 |
+
scalers = json.load(f)
|
47 |
+
|
48 |
+
min_close = scalers['min_value']['Close']
|
49 |
+
max_close = scalers['max_value']['Close']
|
50 |
+
|
51 |
+
lst_seq = dataframe[-sequence_length:].values
|
52 |
+
lst_seq = expand_dims(lst_seq, axis=0)
|
53 |
+
|
54 |
+
predicted_prices = {}
|
55 |
+
last_date = to_datetime(dataframe.index[-1])
|
56 |
+
|
57 |
+
# for _ in range(days):
|
58 |
+
# predicted = session.run(None, {input_name: lst_seq.astype(np.float32)})[0]
|
59 |
+
|
60 |
+
# denorm_price = self.denormalization(predicted[0][0], min_close, max_close)
|
61 |
+
|
62 |
+
# last_date += Timedelta(days=1)
|
63 |
+
# predicted_prices[last_date] = denorm_price.flatten()[0]
|
64 |
+
|
65 |
+
# lst_seq = np.roll(lst_seq, shift=-1, axis=1)
|
66 |
+
# lst_seq[:, -1, -1] = predicted[0][0][0]
|
67 |
+
|
68 |
+
|
69 |
+
|
70 |
+
# for _ in range(days):
|
71 |
+
# predicted = session.run(None, {input_name: lst_seq.astype(np.float32)})[0]
|
72 |
+
|
73 |
+
# value = np.array(predicted).flatten()[0]
|
74 |
+
# denorm_price = (value * (max_close - min_close)) + min_close
|
75 |
+
|
76 |
+
# # last_date += pd.Timedelta(days=1)
|
77 |
+
# last_date = pd.to_datetime(last_date) + pd.Timedelta(days=1)
|
78 |
+
# # predicted_prices[last_date.strftime('%Y-%m-%d')] = float(denorm_price)
|
79 |
+
# predicted_prices[last_date] = self.truncate_2_decimal(denorm_price)
|
80 |
+
|
81 |
+
# lst_seq = np.roll(lst_seq, shift=-1, axis=1)
|
82 |
+
# lst_seq[:, -1, -1] = value
|
83 |
+
|
84 |
+
for _ in range(days):
|
85 |
+
predicted = session.run(None, {input_name: lst_seq.astype(np.float32)})[0]
|
86 |
+
value = np.array(predicted).flatten()[0]
|
87 |
+
if np.isnan(value):
|
88 |
+
continue
|
89 |
+
denorm_price = self.denormalization(value, min_close, max_close)
|
90 |
+
if np.isnan(denorm_price):
|
91 |
+
continue
|
92 |
+
last_date = pd.to_datetime(last_date) + pd.Timedelta(days=1)
|
93 |
+
predicted_prices[last_date] = self.truncate_2_decimal(denorm_price)
|
94 |
+
lst_seq = np.roll(lst_seq, shift=-1, axis=1)
|
95 |
+
lst_seq[:, -1, -1] = value
|
96 |
+
|
97 |
+
|
98 |
+
|
99 |
+
# predictions = [
|
100 |
+
# {'date': date.strftime('%Y-%m-%d'), 'price': float(price)}
|
101 |
+
# for date, price in predicted_prices.items()
|
102 |
+
# ]
|
103 |
+
predictions = [
|
104 |
+
{'date': date.strftime('%Y-%m-%d'), 'price': price}
|
105 |
+
for date, price in predicted_prices.items()
|
106 |
+
]
|
107 |
+
|
108 |
+
df_date = dataframe.index[-sequence_length:]
|
109 |
+
close_values = dataframe.iloc[-sequence_length:]['Close'].values
|
110 |
+
close_denorm = self.denormalization(close_values, min_close, max_close)
|
111 |
+
|
112 |
+
actuals = [
|
113 |
+
{'date': to_datetime(date).strftime('%Y-%m-%d'), 'price': self.truncate_2_decimal(price)}
|
114 |
+
for date, price in zip(df_date, close_denorm)
|
115 |
+
]
|
116 |
+
|
117 |
+
os.system(f'ls -al {self.model_path}')
|
118 |
+
os.system(f'ls -al {self.posttrained_path}')
|
119 |
+
os.system(f'ls -al {self.scaler_path}')
|
120 |
+
|
121 |
+
return actuals, predictions
|
restful/services.py
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
-
from restful.cutils.utilities import Utilities
|
|
|
2 |
from restful.schemas import CryptocurrencyPredictionSchema
|
3 |
|
4 |
class cryptocurrency_svc:
|
@@ -16,4 +17,4 @@ class cryptocurrency_svc:
|
|
16 |
sequence_length = 60
|
17 |
)
|
18 |
|
19 |
-
return {'actuals': actuals, 'predictions': predictions}
|
|
|
1 |
+
# from restful.cutils.utilities import Utilities
|
2 |
+
from restful.onnx_utilities import Utilities
|
3 |
from restful.schemas import CryptocurrencyPredictionSchema
|
4 |
|
5 |
class cryptocurrency_svc:
|
|
|
17 |
sequence_length = 60
|
18 |
)
|
19 |
|
20 |
+
return {'actuals': actuals, 'predictions': predictions}
|