Spaces:

qywok
/

cryptocurrency_prediction

Running

App Files Files Community

tebakaja commited on 11 days ago

Commit

85332cd

1 Parent(s): 5446c65

feat: update

Browse files

Files changed (4) hide show

.github/workflows/pipeline.yaml +81 -80
Dockerfile +54 -24
restful/onnx_utilities.py +121 -0
restful/services.py +3 -2

.github/workflows/pipeline.yaml CHANGED Viewed

@@ -7,8 +7,9 @@ on:
     tags:
       - '*'
   schedule:
-    - cron: "0 13 * * *"
-    # 20 - 7 = 13
 jobs:
   extraction_train_modeling:
@@ -44,90 +45,90 @@ jobs:
             echo "match=false" >> $GITHUB_ENV
           fi
-      - name: Scraping Yahoo Finance
-        if: env.match != 'true'
-        run: |
-          mkdir datasets
-          wget https://github.com/belajarqywok/cryptocurrency_prediction/raw/main/postman/symbols.json \
-            -O postman/symbols.json
-          go run scraper.go
-      - name: Install Libraries
-        if: env.match != 'true'
-        run: pip install -r requirements.txt
-      - name: Modeling and Training
-        if: env.match != 'true'
-        run: |
-          mkdir models
-          mkdir pickles
-          mkdir posttrained
-          python training.py
       - name: Set Pipeline Schedule
         if: env.match != 'true'
         run: echo "$(date +'%Y-%m-%d')" > pipeline_schedule.ctl
-      - name: Zip Posttrained, Models, and Pickles
-        if: env.match != 'true'
-        run: |
-          zip -r models.zip models
-          zip -r pickles.zip pickles
-          zip -r datasets.zip datasets
-          zip -r posttrained.zip posttrained
-      - name: Store Datasets to Google Drive
-        if: env.match != 'true'
-        uses: adityak74/google-drive-upload-git-action@main
-        with:
-          credentials: ${{ secrets.GDRIVE_CRED }}
-          filename: datasets.zip
-          folderId: ${{ secrets.GDRIVE_ID }}
-          name: datasets.zip
-          overwrite: "true"
-      - name: Store Models to Google Drive
-        if: env.match != 'true'
-        uses: adityak74/google-drive-upload-git-action@main
-        with:
-          credentials: ${{ secrets.GDRIVE_CRED }}
-          filename: models.zip
-          folderId: ${{ secrets.GDRIVE_ID }}
-          name: models.zip
-          overwrite: "true"
-      - name: Store Pickles to Google Drive
-        if: env.match != 'true'
-        uses: adityak74/google-drive-upload-git-action@main
-        with:
-          credentials: ${{ secrets.GDRIVE_CRED }}
-          filename: pickles.zip
-          folderId: ${{ secrets.GDRIVE_ID }}
-          name: pickles.zip
-          overwrite: "true"
-      - name: Store Posttrained to Google Drive
-        if: env.match != 'true'
-        uses: adityak74/google-drive-upload-git-action@main
-        with:
-          credentials: ${{ secrets.GDRIVE_CRED }}
-          filename: posttrained.zip
-          folderId: ${{ secrets.GDRIVE_ID }}
-          name: posttrained.zip
-          overwrite: "true"
-      - name: Remove Temporarary Files and Directories
-        if: env.match != 'true'
-        run: |
-          rm models.zip
-          rm pickles.zip
-          rm datasets.zip
-          rm posttrained.zip
-          rm -rf models
-          rm -rf pickles
-          rm -rf datasets
-          rm -rf posttrained
       - name: Commit changes
         if: env.match != 'true'

     tags:
       - '*'
   schedule:
+    - cron: "0 22 * * *"
+    # 5 - 7 = [ -2 ]
+    # karena minus, jadi 24 - 2 = [ 22 ]
 jobs:
   extraction_train_modeling:
             echo "match=false" >> $GITHUB_ENV
           fi
+      # - name: Scraping Yahoo Finance
+      #   if: env.match != 'true'
+      #   run: |
+      #     mkdir datasets
+      #     wget https://github.com/belajarqywok/cryptocurrency_prediction/raw/main/postman/symbols.json \
+      #       -O postman/symbols.json
+      #     go run scraper.go
+      # - name: Install Libraries
+      #   if: env.match != 'true'
+      #   run: pip install -r requirements.txt
+      # - name: Modeling and Training
+      #   if: env.match != 'true'
+      #   run: |
+      #     mkdir models
+      #     mkdir pickles
+      #     mkdir posttrained
+      #     python training.py
       - name: Set Pipeline Schedule
         if: env.match != 'true'
         run: echo "$(date +'%Y-%m-%d')" > pipeline_schedule.ctl
+      # - name: Zip Posttrained, Models, and Pickles
+      #   if: env.match != 'true'
+      #   run: |
+      #     zip -r models.zip models
+      #     zip -r pickles.zip pickles
+      #     zip -r datasets.zip datasets
+      #     zip -r posttrained.zip posttrained
+      # - name: Store Datasets to Google Drive
+      #   if: env.match != 'true'
+      #   uses: adityak74/google-drive-upload-git-action@main
+      #   with:
+      #     credentials: ${{ secrets.GDRIVE_CRED }}
+      #     filename: datasets.zip
+      #     folderId: ${{ secrets.GDRIVE_ID }}
+      #     name: datasets.zip
+      #     overwrite: "true"
+      # - name: Store Models to Google Drive
+      #   if: env.match != 'true'
+      #   uses: adityak74/google-drive-upload-git-action@main
+      #   with:
+      #     credentials: ${{ secrets.GDRIVE_CRED }}
+      #     filename: models.zip
+      #     folderId: ${{ secrets.GDRIVE_ID }}
+      #     name: models.zip
+      #     overwrite: "true"
+      # - name: Store Pickles to Google Drive
+      #   if: env.match != 'true'
+      #   uses: adityak74/google-drive-upload-git-action@main
+      #   with:
+      #     credentials: ${{ secrets.GDRIVE_CRED }}
+      #     filename: pickles.zip
+      #     folderId: ${{ secrets.GDRIVE_ID }}
+      #     name: pickles.zip
+      #     overwrite: "true"
+      # - name: Store Posttrained to Google Drive
+      #   if: env.match != 'true'
+      #   uses: adityak74/google-drive-upload-git-action@main
+      #   with:
+      #     credentials: ${{ secrets.GDRIVE_CRED }}
+      #     filename: posttrained.zip
+      #     folderId: ${{ secrets.GDRIVE_ID }}
+      #     name: posttrained.zip
+      #     overwrite: "true"
+      # - name: Remove Temporarary Files and Directories
+      #   if: env.match != 'true'
+      #   run: |
+      #     rm models.zip
+      #     rm pickles.zip
+      #     rm datasets.zip
+      #     rm posttrained.zip
+      #     rm -rf models
+      #     rm -rf pickles
+      #     rm -rf datasets
+      #     rm -rf posttrained
       - name: Commit changes
         if: env.match != 'true'

Dockerfile CHANGED Viewed

@@ -1,44 +1,74 @@
-FROM python:3.9-bullseye
 LABEL organization="R6Q - Infraprasta University"
-LABEL team="Group 5"
 RUN useradd -m -u 1000 user
 WORKDIR /app
 COPY --chown=user ./requirements.txt requirements.txt
-RUN pip install --no-cache-dir --upgrade -r requirements.txt
 COPY --chown=user . /app
-RUN apt-get update && \
-    apt-get install -y gcc python3-dev gnupg curl
-RUN pip install cython
-RUN cd /app/restful/cutils && \
-    python setup.py build_ext --inplace && \
-    chmod 777 * && cd ../..
-RUN pip install gdown
-RUN --mount=type=secret,id=MODELS_ID,mode=0444,required=true \
-	gdown https://drive.google.com/uc?id=$(cat /run/secrets/MODELS_ID) && \
-    unzip models.zip && rm models.zip
-RUN --mount=type=secret,id=PICKLES_ID,mode=0444,required=true \
-	gdown https://drive.google.com/uc?id=$(cat /run/secrets/PICKLES_ID) && \
-    unzip pickles.zip && rm pickles.zip
-RUN --mount=type=secret,id=DATASETS_ID,mode=0444,required=true \
-	gdown https://drive.google.com/uc?id=$(cat /run/secrets/DATASETS_ID) && \
-    unzip datasets.zip && rm datasets.zip
-RUN --mount=type=secret,id=POSTTRAINED_ID,mode=0444,required=true \
-	gdown https://drive.google.com/uc?id=$(cat /run/secrets/POSTTRAINED_ID) && \
-    unzip posttrained.zip && rm posttrained.zip
-CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--workers", "10", "--port", "7860"]

+FROM python:3.11-bullseye
 LABEL organization="R6Q - Infraprasta University"
 RUN useradd -m -u 1000 user
 WORKDIR /app
 COPY --chown=user ./requirements.txt requirements.txt
+RUN apt-get update && \
+    apt-get install -y gcc python3-dev git git-lfs curl && \
+    pip install --no-cache-dir --upgrade -r requirements.txt && \
+    pip install cython onnxruntime==1.20.1
 COPY --chown=user . /app
+RUN git lfs install && \
+    git clone https://huggingface.co/datasets/qywok/indonesia_stocks && \
+    mkdir -p models && \
+    for i in $(seq 1 10); do \
+      git clone https://huggingface.co/qywok/stock_models_$i && \
+      cd stock_models_$i && git lfs pull && cd .. && \
+      mv stock_models_$i/*.onnx models/ && \
+      rm -rf stock_models_$i; \
+    done
+RUN chmod -R 755 /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1"]
+# FROM python:3.9-bullseye
+# LABEL organization="R6Q - Infraprasta University"
+# LABEL team="Group 5"
+# RUN useradd -m -u 1000 user
+# WORKDIR /app
+# COPY --chown=user ./requirements.txt requirements.txt
+# RUN pip install --no-cache-dir --upgrade -r requirements.txt
+# COPY --chown=user . /app
+# RUN apt-get update && \
+#     apt-get install -y gcc python3-dev gnupg curl
+# RUN pip install cython
+# RUN cd /app/restful/cutils && \
+#     python setup.py build_ext --inplace && \
+#     chmod 777 * && cd ../..
+# RUN pip install gdown
+# RUN --mount=type=secret,id=MODELS_ID,mode=0444,required=true \
+# 	gdown https://drive.google.com/uc?id=$(cat /run/secrets/MODELS_ID) && \
+#     unzip models.zip && rm models.zip
+# RUN --mount=type=secret,id=PICKLES_ID,mode=0444,required=true \
+# 	gdown https://drive.google.com/uc?id=$(cat /run/secrets/PICKLES_ID) && \
+#     unzip pickles.zip && rm pickles.zip
+# RUN --mount=type=secret,id=DATASETS_ID,mode=0444,required=true \
+# 	gdown https://drive.google.com/uc?id=$(cat /run/secrets/DATASETS_ID) && \
+#     unzip datasets.zip && rm datasets.zip
+# RUN --mount=type=secret,id=POSTTRAINED_ID,mode=0444,required=true \
+# 	gdown https://drive.google.com/uc?id=$(cat /run/secrets/POSTTRAINED_ID) && \
+#     unzip posttrained.zip && rm posttrained.zip
+# CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--workers", "10", "--port", "7860"]

restful/onnx_utilities.py ADDED Viewed

	@@ -0,0 +1,121 @@

+import os
+import json
+import numpy as np
+import pandas as pd
+import onnxruntime as ort
+from numpy import append, expand_dims
+from decimal import Decimal, ROUND_DOWN
+from pandas import read_csv, to_datetime, Timedelta
+class Utilities:
+    def __init__(self) -> None:
+        self.model_path = './models'
+        self.posttrained_path = './indonesia_stocks/modeling_datas'
+        self.scaler_path = './indonesia_stocks/min_max'
+    # def truncate_2_decimal(self, val: float):
+    #     return float(Decimal(str(val)).quantize(Decimal('0.01'), rounding=ROUND_DOWN))
+    def truncate_2_decimal(self, val: float):
+        try:
+            return float(Decimal(str(float(val))).quantize(Decimal('0.001'), rounding=ROUND_DOWN))
+        except Exception as e:
+            print("Decimal error:", e)
+            return float(val)
+    def denormalization(self, data, min_value, max_value):
+        return (data * (max_value - min_value)) + min_value
+    async def cryptocurrency_prediction_utils(self,
+        days: int, sequence_length: int, model_name: str) -> tuple:
+        model_path = os.path.join(self.model_path, f'{model_name}.onnx')
+        # session = ort.InferenceSession(model_path)
+        try:
+            session = ort.InferenceSession(model_path)
+        except Exception as e:
+            print("ONNX model load error:", e)
+            return [], []
+        input_name = session.get_inputs()[0].name
+        dataframe_path = os.path.join(self.posttrained_path, f'{model_name}.csv')
+        dataframe = read_csv(dataframe_path, index_col='Date', parse_dates=True)
+        scaler_path = os.path.join(self.scaler_path, f'{model_name}.json')
+        with open(scaler_path, 'r') as f:
+            scalers = json.load(f)
+        min_close = scalers['min_value']['Close']
+        max_close = scalers['max_value']['Close']
+        lst_seq = dataframe[-sequence_length:].values
+        lst_seq = expand_dims(lst_seq, axis=0)
+        predicted_prices = {}
+        last_date = to_datetime(dataframe.index[-1])
+        # for _ in range(days):
+        #     predicted = session.run(None, {input_name: lst_seq.astype(np.float32)})[0]
+        #     denorm_price = self.denormalization(predicted[0][0], min_close, max_close)
+        #     last_date += Timedelta(days=1)
+        #     predicted_prices[last_date] = denorm_price.flatten()[0]
+        #     lst_seq = np.roll(lst_seq, shift=-1, axis=1)
+        #     lst_seq[:, -1, -1] = predicted[0][0][0]
+        # for _ in range(days):
+        #     predicted = session.run(None, {input_name: lst_seq.astype(np.float32)})[0]
+        #     value = np.array(predicted).flatten()[0]
+        #     denorm_price = (value * (max_close - min_close)) + min_close
+        #     # last_date += pd.Timedelta(days=1)
+        #     last_date = pd.to_datetime(last_date) + pd.Timedelta(days=1)
+        #     # predicted_prices[last_date.strftime('%Y-%m-%d')] = float(denorm_price)
+        #     predicted_prices[last_date] = self.truncate_2_decimal(denorm_price)
+        #     lst_seq = np.roll(lst_seq, shift=-1, axis=1)
+        #     lst_seq[:, -1, -1] = value
+        for _ in range(days):
+            predicted = session.run(None, {input_name: lst_seq.astype(np.float32)})[0]
+            value = np.array(predicted).flatten()[0]
+            if np.isnan(value):
+                continue
+            denorm_price = self.denormalization(value, min_close, max_close)
+            if np.isnan(denorm_price):
+                continue
+            last_date = pd.to_datetime(last_date) + pd.Timedelta(days=1)
+            predicted_prices[last_date] = self.truncate_2_decimal(denorm_price)
+            lst_seq = np.roll(lst_seq, shift=-1, axis=1)
+            lst_seq[:, -1, -1] = value
+        # predictions = [
+        #     {'date': date.strftime('%Y-%m-%d'), 'price': float(price)}
+        #     for date, price in predicted_prices.items()
+        # ]
+        predictions = [
+            {'date': date.strftime('%Y-%m-%d'), 'price': price}
+            for date, price in predicted_prices.items()
+        ]
+        df_date = dataframe.index[-sequence_length:]
+        close_values = dataframe.iloc[-sequence_length:]['Close'].values
+        close_denorm = self.denormalization(close_values, min_close, max_close)
+        actuals = [
+            {'date': to_datetime(date).strftime('%Y-%m-%d'), 'price': self.truncate_2_decimal(price)}
+            for date, price in zip(df_date, close_denorm)
+        ]
+        os.system(f'ls -al {self.model_path}')
+        os.system(f'ls -al {self.posttrained_path}')
+        os.system(f'ls -al {self.scaler_path}')
+        return actuals, predictions

restful/services.py CHANGED Viewed

@@ -1,4 +1,5 @@
-from restful.cutils.utilities import Utilities
 from restful.schemas import CryptocurrencyPredictionSchema
 class cryptocurrency_svc:
@@ -16,4 +17,4 @@ class cryptocurrency_svc:
 			sequence_length = 60
 		)
-		return {'actuals': actuals, 'predictions': predictions}

+# from restful.cutils.utilities import Utilities
+from restful.onnx_utilities import Utilities
 from restful.schemas import CryptocurrencyPredictionSchema
 class cryptocurrency_svc:
 			sequence_length = 60
 		)
+		return {'actuals': actuals, 'predictions': predictions}