File size: 3,982 Bytes
f5407b6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# Import the necessary libraries
import os
import json
import pytest
import pandas as pd
from dynamicts.data_loader import DataLoader
from dynamicts.analysis import UnivariateAnalysis

# Dummy test cases
def test_null():
    assert True
    
# Constants
data_url = "https://raw.githubusercontent.com/Chinar-Quantum-AI-Ltd/public_datasets/main/bitcoin_price.csv"

loader = DataLoader(filepath=data_url, index_col="Date")

# tEST FOR DATA loader
def test_load_success():
    df = loader.load()
    # check datframe loaded 
    assert isinstance(df, pd.DataFrame)
    # columns are lower case
    assert all(col == col.lower() for col in df.columns)
    # index is lower case
    assert df.index.name == "date"
    
def test_load_failure():
    url = "https://raw.githubusercontent.com/Chinar-Quantum-AI-Ltd/public_datasets/main/price.csv" # invalid url for testing
    loader = DataLoader(
        filepath=url, index_col="Date"
    )
    with pytest.raises(ValueError):
        loader.load()
        
def test_is_regular():
    # loader = DataLoader(
    #     filepath=data_url,
    #     index_col="Date"
    # )
    loader.load()
    assert loader.is_regular() is True
    
def test_is_regular_false(tmp_path):
    # Create irregular CSV
    irregular = tmp_path / "irregular.csv"
    # create dummy irregular data
    dts = pd.to_datetime(["2021-01-01", "2021-01-02", "2021-01-04", "2021-01-07"]) 
    df_irreg = pd.DataFrame({"date": dts, "y": [1,2,3,4]}).set_index("date")
    df_irreg.to_csv(irregular)
    loader = DataLoader(filepath=str(irregular), index_col="date")
    loader.load()
    assert loader.is_regular() is False
    
def test_save_metadata(tmp_path, monkeypatch):
    # Monkey patch workingh dir to temp path for clean metadata
    monkeypatch.chdir(tmp_path)
    # loader = DataLoader(
    #     filepath=data_url,
    #     index_col="Date"
    # )
    df = loader.load()
    # Save metadata (writes to ./metadata/<filename>_meta.json)
    loader.save_metadata()

    # Verify expected file exists
    expected_filename = os.path.splitext(os.path.basename(data_url))[0] + "_meta.json"
    meta_path = tmp_path / "metadata" / expected_filename

    assert meta_path.exists()

    # Check metadata content
    with open(meta_path) as f:
        meta = json.load(f)

    assert meta["columns"] == list(df.columns)
    assert meta["shape"] == list(df.shape) or tuple(df.shape)
    assert meta["index_name"] == df.index.name

# # Some Test cases for analysis.py script

# # Tests for univariate analysis module
# def test_ditribution_stats_and_missing(monkeypatch, tmp_path):
#     analysis = UnivariateAnalysis(
#         filepath=data_url,
#         target_col="Close",
#         index_col="Date"
#         )
#     # test distribution stats
#     stats = analysis.check_distribution_stats()
#     assert "skewness" in stats
#     assert "kurtosis" in stats
#     assert isinstance(stats["skewness"], float)
#     assert isinstance(stats["kurtosis"], float)
    
# #     # test missing values
# #     missing = analysis.check_missing_values()
# #     assert "missing_count" in missing
# #     assert "missing_percentage" in missing
# #     assert isinstance(missing["missing_percentage"], float)
    
# def test_outlier_detection_and_rolling():
#     analysis = UnivariateAnalysis(
#         filepath=data_url,
#         target_col="Close",
#         index_col="Date"
#         )
#     # tests for outlier detection
#     outliers = analysis.detect_outliers(method="both", plot=False)
#     assert "outliers_detected" in outliers
#     assert outliers["outliers_detected"] >= 0
    
#     # test for rolling stat
#     rolling = analysis.measure_rolling_statistics(window=7)
#     assert "rolling_mean" in rolling
#     assert isinstance(rolling["rolling_mean"], pd.Series)
#     assert rolling["rolling_mean"].shape == analysis.df["close"].shape