tebakaja commited on
Commit
85332cd
·
1 Parent(s): 5446c65

feat: update

Browse files
.github/workflows/pipeline.yaml CHANGED
@@ -7,8 +7,9 @@ on:
7
  tags:
8
  - '*'
9
  schedule:
10
- - cron: "0 13 * * *"
11
- # 20 - 7 = 13
 
12
 
13
  jobs:
14
  extraction_train_modeling:
@@ -44,90 +45,90 @@ jobs:
44
  echo "match=false" >> $GITHUB_ENV
45
  fi
46
 
47
- - name: Scraping Yahoo Finance
48
- if: env.match != 'true'
49
- run: |
50
- mkdir datasets
51
- wget https://github.com/belajarqywok/cryptocurrency_prediction/raw/main/postman/symbols.json \
52
- -O postman/symbols.json
53
- go run scraper.go
54
-
55
- - name: Install Libraries
56
- if: env.match != 'true'
57
- run: pip install -r requirements.txt
58
-
59
- - name: Modeling and Training
60
- if: env.match != 'true'
61
- run: |
62
- mkdir models
63
- mkdir pickles
64
- mkdir posttrained
65
- python training.py
66
 
67
  - name: Set Pipeline Schedule
68
  if: env.match != 'true'
69
  run: echo "$(date +'%Y-%m-%d')" > pipeline_schedule.ctl
70
 
71
- - name: Zip Posttrained, Models, and Pickles
72
- if: env.match != 'true'
73
- run: |
74
- zip -r models.zip models
75
- zip -r pickles.zip pickles
76
- zip -r datasets.zip datasets
77
- zip -r posttrained.zip posttrained
78
-
79
- - name: Store Datasets to Google Drive
80
- if: env.match != 'true'
81
- uses: adityak74/google-drive-upload-git-action@main
82
- with:
83
- credentials: ${{ secrets.GDRIVE_CRED }}
84
- filename: datasets.zip
85
- folderId: ${{ secrets.GDRIVE_ID }}
86
- name: datasets.zip
87
- overwrite: "true"
88
 
89
- - name: Store Models to Google Drive
90
- if: env.match != 'true'
91
- uses: adityak74/google-drive-upload-git-action@main
92
- with:
93
- credentials: ${{ secrets.GDRIVE_CRED }}
94
- filename: models.zip
95
- folderId: ${{ secrets.GDRIVE_ID }}
96
- name: models.zip
97
- overwrite: "true"
98
-
99
- - name: Store Pickles to Google Drive
100
- if: env.match != 'true'
101
- uses: adityak74/google-drive-upload-git-action@main
102
- with:
103
- credentials: ${{ secrets.GDRIVE_CRED }}
104
- filename: pickles.zip
105
- folderId: ${{ secrets.GDRIVE_ID }}
106
- name: pickles.zip
107
- overwrite: "true"
108
-
109
- - name: Store Posttrained to Google Drive
110
- if: env.match != 'true'
111
- uses: adityak74/google-drive-upload-git-action@main
112
- with:
113
- credentials: ${{ secrets.GDRIVE_CRED }}
114
- filename: posttrained.zip
115
- folderId: ${{ secrets.GDRIVE_ID }}
116
- name: posttrained.zip
117
- overwrite: "true"
118
-
119
- - name: Remove Temporarary Files and Directories
120
- if: env.match != 'true'
121
- run: |
122
- rm models.zip
123
- rm pickles.zip
124
- rm datasets.zip
125
- rm posttrained.zip
126
-
127
- rm -rf models
128
- rm -rf pickles
129
- rm -rf datasets
130
- rm -rf posttrained
131
 
132
  - name: Commit changes
133
  if: env.match != 'true'
 
7
  tags:
8
  - '*'
9
  schedule:
10
+ - cron: "0 22 * * *"
11
+ # 5 - 7 = [ -2 ]
12
+ # karena minus, jadi 24 - 2 = [ 22 ]
13
 
14
  jobs:
15
  extraction_train_modeling:
 
45
  echo "match=false" >> $GITHUB_ENV
46
  fi
47
 
48
+ # - name: Scraping Yahoo Finance
49
+ # if: env.match != 'true'
50
+ # run: |
51
+ # mkdir datasets
52
+ # wget https://github.com/belajarqywok/cryptocurrency_prediction/raw/main/postman/symbols.json \
53
+ # -O postman/symbols.json
54
+ # go run scraper.go
55
+
56
+ # - name: Install Libraries
57
+ # if: env.match != 'true'
58
+ # run: pip install -r requirements.txt
59
+
60
+ # - name: Modeling and Training
61
+ # if: env.match != 'true'
62
+ # run: |
63
+ # mkdir models
64
+ # mkdir pickles
65
+ # mkdir posttrained
66
+ # python training.py
67
 
68
  - name: Set Pipeline Schedule
69
  if: env.match != 'true'
70
  run: echo "$(date +'%Y-%m-%d')" > pipeline_schedule.ctl
71
 
72
+ # - name: Zip Posttrained, Models, and Pickles
73
+ # if: env.match != 'true'
74
+ # run: |
75
+ # zip -r models.zip models
76
+ # zip -r pickles.zip pickles
77
+ # zip -r datasets.zip datasets
78
+ # zip -r posttrained.zip posttrained
79
+
80
+ # - name: Store Datasets to Google Drive
81
+ # if: env.match != 'true'
82
+ # uses: adityak74/google-drive-upload-git-action@main
83
+ # with:
84
+ # credentials: ${{ secrets.GDRIVE_CRED }}
85
+ # filename: datasets.zip
86
+ # folderId: ${{ secrets.GDRIVE_ID }}
87
+ # name: datasets.zip
88
+ # overwrite: "true"
89
 
90
+ # - name: Store Models to Google Drive
91
+ # if: env.match != 'true'
92
+ # uses: adityak74/google-drive-upload-git-action@main
93
+ # with:
94
+ # credentials: ${{ secrets.GDRIVE_CRED }}
95
+ # filename: models.zip
96
+ # folderId: ${{ secrets.GDRIVE_ID }}
97
+ # name: models.zip
98
+ # overwrite: "true"
99
+
100
+ # - name: Store Pickles to Google Drive
101
+ # if: env.match != 'true'
102
+ # uses: adityak74/google-drive-upload-git-action@main
103
+ # with:
104
+ # credentials: ${{ secrets.GDRIVE_CRED }}
105
+ # filename: pickles.zip
106
+ # folderId: ${{ secrets.GDRIVE_ID }}
107
+ # name: pickles.zip
108
+ # overwrite: "true"
109
+
110
+ # - name: Store Posttrained to Google Drive
111
+ # if: env.match != 'true'
112
+ # uses: adityak74/google-drive-upload-git-action@main
113
+ # with:
114
+ # credentials: ${{ secrets.GDRIVE_CRED }}
115
+ # filename: posttrained.zip
116
+ # folderId: ${{ secrets.GDRIVE_ID }}
117
+ # name: posttrained.zip
118
+ # overwrite: "true"
119
+
120
+ # - name: Remove Temporarary Files and Directories
121
+ # if: env.match != 'true'
122
+ # run: |
123
+ # rm models.zip
124
+ # rm pickles.zip
125
+ # rm datasets.zip
126
+ # rm posttrained.zip
127
+
128
+ # rm -rf models
129
+ # rm -rf pickles
130
+ # rm -rf datasets
131
+ # rm -rf posttrained
132
 
133
  - name: Commit changes
134
  if: env.match != 'true'
Dockerfile CHANGED
@@ -1,44 +1,74 @@
1
- FROM python:3.9-bullseye
2
 
3
  LABEL organization="R6Q - Infraprasta University"
4
- LABEL team="Group 5"
5
 
6
  RUN useradd -m -u 1000 user
7
 
8
  WORKDIR /app
9
 
10
  COPY --chown=user ./requirements.txt requirements.txt
11
-
12
- RUN pip install --no-cache-dir --upgrade -r requirements.txt
 
 
13
 
14
  COPY --chown=user . /app
15
 
16
- RUN apt-get update && \
17
- apt-get install -y gcc python3-dev gnupg curl
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
- RUN pip install cython
20
 
21
- RUN cd /app/restful/cutils && \
22
- python setup.py build_ext --inplace && \
23
- chmod 777 * && cd ../..
24
 
25
- RUN pip install gdown
26
 
27
- RUN --mount=type=secret,id=MODELS_ID,mode=0444,required=true \
28
- gdown https://drive.google.com/uc?id=$(cat /run/secrets/MODELS_ID) && \
29
- unzip models.zip && rm models.zip
30
 
31
- RUN --mount=type=secret,id=PICKLES_ID,mode=0444,required=true \
32
- gdown https://drive.google.com/uc?id=$(cat /run/secrets/PICKLES_ID) && \
33
- unzip pickles.zip && rm pickles.zip
34
 
35
- RUN --mount=type=secret,id=DATASETS_ID,mode=0444,required=true \
36
- gdown https://drive.google.com/uc?id=$(cat /run/secrets/DATASETS_ID) && \
37
- unzip datasets.zip && rm datasets.zip
38
 
39
- RUN --mount=type=secret,id=POSTTRAINED_ID,mode=0444,required=true \
40
- gdown https://drive.google.com/uc?id=$(cat /run/secrets/POSTTRAINED_ID) && \
41
- unzip posttrained.zip && rm posttrained.zip
42
 
43
 
44
- CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--workers", "10", "--port", "7860"]
 
1
+ FROM python:3.11-bullseye
2
 
3
  LABEL organization="R6Q - Infraprasta University"
 
4
 
5
  RUN useradd -m -u 1000 user
6
 
7
  WORKDIR /app
8
 
9
  COPY --chown=user ./requirements.txt requirements.txt
10
+ RUN apt-get update && \
11
+ apt-get install -y gcc python3-dev git git-lfs curl && \
12
+ pip install --no-cache-dir --upgrade -r requirements.txt && \
13
+ pip install cython onnxruntime==1.20.1
14
 
15
  COPY --chown=user . /app
16
 
17
+ RUN git lfs install && \
18
+ git clone https://huggingface.co/datasets/qywok/indonesia_stocks && \
19
+ mkdir -p models && \
20
+ for i in $(seq 1 10); do \
21
+ git clone https://huggingface.co/qywok/stock_models_$i && \
22
+ cd stock_models_$i && git lfs pull && cd .. && \
23
+ mv stock_models_$i/*.onnx models/ && \
24
+ rm -rf stock_models_$i; \
25
+ done
26
+
27
+ RUN chmod -R 755 /app
28
+
29
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
30
+
31
+ # FROM python:3.9-bullseye
32
+
33
+ # LABEL organization="R6Q - Infraprasta University"
34
+ # LABEL team="Group 5"
35
+
36
+ # RUN useradd -m -u 1000 user
37
+
38
+ # WORKDIR /app
39
+
40
+ # COPY --chown=user ./requirements.txt requirements.txt
41
+
42
+ # RUN pip install --no-cache-dir --upgrade -r requirements.txt
43
+
44
+ # COPY --chown=user . /app
45
+
46
+ # RUN apt-get update && \
47
+ # apt-get install -y gcc python3-dev gnupg curl
48
 
49
+ # RUN pip install cython
50
 
51
+ # RUN cd /app/restful/cutils && \
52
+ # python setup.py build_ext --inplace && \
53
+ # chmod 777 * && cd ../..
54
 
55
+ # RUN pip install gdown
56
 
57
+ # RUN --mount=type=secret,id=MODELS_ID,mode=0444,required=true \
58
+ # gdown https://drive.google.com/uc?id=$(cat /run/secrets/MODELS_ID) && \
59
+ # unzip models.zip && rm models.zip
60
 
61
+ # RUN --mount=type=secret,id=PICKLES_ID,mode=0444,required=true \
62
+ # gdown https://drive.google.com/uc?id=$(cat /run/secrets/PICKLES_ID) && \
63
+ # unzip pickles.zip && rm pickles.zip
64
 
65
+ # RUN --mount=type=secret,id=DATASETS_ID,mode=0444,required=true \
66
+ # gdown https://drive.google.com/uc?id=$(cat /run/secrets/DATASETS_ID) && \
67
+ # unzip datasets.zip && rm datasets.zip
68
 
69
+ # RUN --mount=type=secret,id=POSTTRAINED_ID,mode=0444,required=true \
70
+ # gdown https://drive.google.com/uc?id=$(cat /run/secrets/POSTTRAINED_ID) && \
71
+ # unzip posttrained.zip && rm posttrained.zip
72
 
73
 
74
+ # CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--workers", "10", "--port", "7860"]
restful/onnx_utilities.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import json
3
+ import numpy as np
4
+ import pandas as pd
5
+ import onnxruntime as ort
6
+ from numpy import append, expand_dims
7
+ from decimal import Decimal, ROUND_DOWN
8
+ from pandas import read_csv, to_datetime, Timedelta
9
+
10
+ class Utilities:
11
+ def __init__(self) -> None:
12
+ self.model_path = './models'
13
+ self.posttrained_path = './indonesia_stocks/modeling_datas'
14
+ self.scaler_path = './indonesia_stocks/min_max'
15
+
16
+ # def truncate_2_decimal(self, val: float):
17
+ # return float(Decimal(str(val)).quantize(Decimal('0.01'), rounding=ROUND_DOWN))
18
+ def truncate_2_decimal(self, val: float):
19
+ try:
20
+ return float(Decimal(str(float(val))).quantize(Decimal('0.001'), rounding=ROUND_DOWN))
21
+ except Exception as e:
22
+ print("Decimal error:", e)
23
+ return float(val)
24
+
25
+
26
+ def denormalization(self, data, min_value, max_value):
27
+ return (data * (max_value - min_value)) + min_value
28
+
29
+ async def cryptocurrency_prediction_utils(self,
30
+ days: int, sequence_length: int, model_name: str) -> tuple:
31
+
32
+ model_path = os.path.join(self.model_path, f'{model_name}.onnx')
33
+ # session = ort.InferenceSession(model_path)
34
+ try:
35
+ session = ort.InferenceSession(model_path)
36
+ except Exception as e:
37
+ print("ONNX model load error:", e)
38
+ return [], []
39
+ input_name = session.get_inputs()[0].name
40
+
41
+ dataframe_path = os.path.join(self.posttrained_path, f'{model_name}.csv')
42
+ dataframe = read_csv(dataframe_path, index_col='Date', parse_dates=True)
43
+
44
+ scaler_path = os.path.join(self.scaler_path, f'{model_name}.json')
45
+ with open(scaler_path, 'r') as f:
46
+ scalers = json.load(f)
47
+
48
+ min_close = scalers['min_value']['Close']
49
+ max_close = scalers['max_value']['Close']
50
+
51
+ lst_seq = dataframe[-sequence_length:].values
52
+ lst_seq = expand_dims(lst_seq, axis=0)
53
+
54
+ predicted_prices = {}
55
+ last_date = to_datetime(dataframe.index[-1])
56
+
57
+ # for _ in range(days):
58
+ # predicted = session.run(None, {input_name: lst_seq.astype(np.float32)})[0]
59
+
60
+ # denorm_price = self.denormalization(predicted[0][0], min_close, max_close)
61
+
62
+ # last_date += Timedelta(days=1)
63
+ # predicted_prices[last_date] = denorm_price.flatten()[0]
64
+
65
+ # lst_seq = np.roll(lst_seq, shift=-1, axis=1)
66
+ # lst_seq[:, -1, -1] = predicted[0][0][0]
67
+
68
+
69
+
70
+ # for _ in range(days):
71
+ # predicted = session.run(None, {input_name: lst_seq.astype(np.float32)})[0]
72
+
73
+ # value = np.array(predicted).flatten()[0]
74
+ # denorm_price = (value * (max_close - min_close)) + min_close
75
+
76
+ # # last_date += pd.Timedelta(days=1)
77
+ # last_date = pd.to_datetime(last_date) + pd.Timedelta(days=1)
78
+ # # predicted_prices[last_date.strftime('%Y-%m-%d')] = float(denorm_price)
79
+ # predicted_prices[last_date] = self.truncate_2_decimal(denorm_price)
80
+
81
+ # lst_seq = np.roll(lst_seq, shift=-1, axis=1)
82
+ # lst_seq[:, -1, -1] = value
83
+
84
+ for _ in range(days):
85
+ predicted = session.run(None, {input_name: lst_seq.astype(np.float32)})[0]
86
+ value = np.array(predicted).flatten()[0]
87
+ if np.isnan(value):
88
+ continue
89
+ denorm_price = self.denormalization(value, min_close, max_close)
90
+ if np.isnan(denorm_price):
91
+ continue
92
+ last_date = pd.to_datetime(last_date) + pd.Timedelta(days=1)
93
+ predicted_prices[last_date] = self.truncate_2_decimal(denorm_price)
94
+ lst_seq = np.roll(lst_seq, shift=-1, axis=1)
95
+ lst_seq[:, -1, -1] = value
96
+
97
+
98
+
99
+ # predictions = [
100
+ # {'date': date.strftime('%Y-%m-%d'), 'price': float(price)}
101
+ # for date, price in predicted_prices.items()
102
+ # ]
103
+ predictions = [
104
+ {'date': date.strftime('%Y-%m-%d'), 'price': price}
105
+ for date, price in predicted_prices.items()
106
+ ]
107
+
108
+ df_date = dataframe.index[-sequence_length:]
109
+ close_values = dataframe.iloc[-sequence_length:]['Close'].values
110
+ close_denorm = self.denormalization(close_values, min_close, max_close)
111
+
112
+ actuals = [
113
+ {'date': to_datetime(date).strftime('%Y-%m-%d'), 'price': self.truncate_2_decimal(price)}
114
+ for date, price in zip(df_date, close_denorm)
115
+ ]
116
+
117
+ os.system(f'ls -al {self.model_path}')
118
+ os.system(f'ls -al {self.posttrained_path}')
119
+ os.system(f'ls -al {self.scaler_path}')
120
+
121
+ return actuals, predictions
restful/services.py CHANGED
@@ -1,4 +1,5 @@
1
- from restful.cutils.utilities import Utilities
 
2
  from restful.schemas import CryptocurrencyPredictionSchema
3
 
4
  class cryptocurrency_svc:
@@ -16,4 +17,4 @@ class cryptocurrency_svc:
16
  sequence_length = 60
17
  )
18
 
19
- return {'actuals': actuals, 'predictions': predictions}
 
1
+ # from restful.cutils.utilities import Utilities
2
+ from restful.onnx_utilities import Utilities
3
  from restful.schemas import CryptocurrencyPredictionSchema
4
 
5
  class cryptocurrency_svc:
 
17
  sequence_length = 60
18
  )
19
 
20
+ return {'actuals': actuals, 'predictions': predictions}