File size: 2,612 Bytes
447ebeb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
import asyncio
import os
import random
import sys
from datetime import datetime, timedelta
from typing import Optional

sys.path.insert(0, os.path.abspath("../.."))
import pytest
import litellm

from litellm_enterprise.enterprise_callbacks.pagerduty.pagerduty import (
    PagerDutyAlerting,
    AlertingConfig,
)
from litellm.proxy._types import UserAPIKeyAuth


@pytest.mark.asyncio
async def test_pagerduty_alerting():
    pagerduty = PagerDutyAlerting(
        alerting_args=AlertingConfig(
            failure_threshold=1, failure_threshold_window_seconds=10
        )
    )
    litellm.callbacks = [pagerduty]

    try:
        await litellm.acompletion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "hi"}],
            mock_response="litellm.RateLimitError",
        )
    except litellm.RateLimitError:
        pass

    await asyncio.sleep(2)


@pytest.mark.asyncio
async def test_pagerduty_alerting_high_failure_rate():
    pagerduty = PagerDutyAlerting(
        alerting_args=AlertingConfig(
            failure_threshold=3, failure_threshold_window_seconds=600
        )
    )
    litellm.callbacks = [pagerduty]

    try:
        await litellm.acompletion(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": "hi"}],
            mock_response="litellm.RateLimitError",
        )
    except litellm.RateLimitError:
        pass

    await asyncio.sleep(2)

    # make 3 more fails
    for _ in range(3):
        try:
            await litellm.acompletion(
                model="gpt-3.5-turbo",
                messages=[{"role": "user", "content": "hi"}],
                mock_response="litellm.RateLimitError",
            )
        except litellm.RateLimitError:
            pass

    await asyncio.sleep(2)


@pytest.mark.asyncio
async def test_pagerduty_hanging_request_alerting():
    pagerduty = PagerDutyAlerting(
        alerting_args=AlertingConfig(hanging_threshold_seconds=0.0000001)
    )
    litellm.callbacks = [pagerduty]

    await pagerduty.async_pre_call_hook(
        cache=None,
        user_api_key_dict=UserAPIKeyAuth(
            api_key="test",
            key_alias="test-pagerduty",
            team_alias="test-team",
            org_id="test-org",
            user_id="test-user",
            end_user_id="test-end-user",
        ),
        data={"model": "gpt-4o", "messages": [{"role": "user", "content": "hi"}]},
        call_type="completion",
    )

    await litellm.acompletion(
        model="gpt-4o",
        messages=[{"role": "user", "content": "hi"}],
    )

    await asyncio.sleep(1)