File size: 18,902 Bytes
e3278e4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
"""
DataDog Integration - sends logs to /api/v2/log

DD Reference API: https://docs.datadoghq.com/api/latest/logs

`async_log_success_event` - used by litellm proxy to send logs to datadog
`log_success_event` - sync version of logging to DataDog, only used on litellm Python SDK, if user opts in to using sync functions

async_log_success_event:  will store batch of DD_MAX_BATCH_SIZE in memory and flush to Datadog once it reaches DD_MAX_BATCH_SIZE or every 5 seconds

async_service_failure_hook: Logs failures from Redis, Postgres (Adjacent systems), as 'WARNING' on DataDog

For batching specific details see CustomBatchLogger class
"""

import asyncio
import datetime
import json
import os
import traceback
import uuid
from datetime import datetime as datetimeObj
from typing import Any, List, Optional, Union

import httpx
from httpx import Response

import litellm
from litellm._logging import verbose_logger
from litellm.integrations.custom_batch_logger import CustomBatchLogger
from litellm.llms.custom_httpx.http_handler import (
    _get_httpx_client,
    get_async_httpx_client,
    httpxSpecialProvider,
)
from litellm.types.integrations.base_health_check import IntegrationHealthCheckStatus
from litellm.types.integrations.datadog import *
from litellm.types.services import ServiceLoggerPayload
from litellm.types.utils import StandardLoggingPayload

from ..base_health_check import HealthCheckIntegration

DD_MAX_BATCH_SIZE = 1000  # max number of logs DD API can accept


class DataDogLogger(
    CustomBatchLogger,
    HealthCheckIntegration,
):
    # Class variables or attributes
    def __init__(
        self,
        **kwargs,
    ):
        """
        Initializes the datadog logger, checks if the correct env variables are set

        Required environment variables:
        `DD_API_KEY` - your datadog api key
        `DD_SITE` - your datadog site, example = `"us5.datadoghq.com"`
        """
        try:
            verbose_logger.debug("Datadog: in init datadog logger")
            # check if the correct env variables are set
            if os.getenv("DD_API_KEY", None) is None:
                raise Exception("DD_API_KEY is not set, set 'DD_API_KEY=<>")
            if os.getenv("DD_SITE", None) is None:
                raise Exception("DD_SITE is not set in .env, set 'DD_SITE=<>")
            self.async_client = get_async_httpx_client(
                llm_provider=httpxSpecialProvider.LoggingCallback
            )
            self.DD_API_KEY = os.getenv("DD_API_KEY")
            self.intake_url = (
                f"https://http-intake.logs.{os.getenv('DD_SITE')}/api/v2/logs"
            )

            ###################################
            # OPTIONAL -only used for testing
            dd_base_url: Optional[str] = (
                os.getenv("_DATADOG_BASE_URL")
                or os.getenv("DATADOG_BASE_URL")
                or os.getenv("DD_BASE_URL")
            )
            if dd_base_url is not None:
                self.intake_url = f"{dd_base_url}/api/v2/logs"
            ###################################
            self.sync_client = _get_httpx_client()
            asyncio.create_task(self.periodic_flush())
            self.flush_lock = asyncio.Lock()
            super().__init__(
                **kwargs, flush_lock=self.flush_lock, batch_size=DD_MAX_BATCH_SIZE
            )
        except Exception as e:
            verbose_logger.exception(
                f"Datadog: Got exception on init Datadog client {str(e)}"
            )
            raise e

    async def async_log_success_event(self, kwargs, response_obj, start_time, end_time):
        """
        Async Log success events to Datadog

        - Creates a Datadog payload
        - Adds the Payload to the in memory logs queue
        - Payload is flushed every 10 seconds or when batch size is greater than 100


        Raises:
            Raises a NON Blocking verbose_logger.exception if an error occurs
        """
        try:
            verbose_logger.debug(
                "Datadog: Logging - Enters logging function for model %s", kwargs
            )
            await self._log_async_event(kwargs, response_obj, start_time, end_time)

        except Exception as e:
            verbose_logger.exception(
                f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
            )
            pass

    async def async_log_failure_event(self, kwargs, response_obj, start_time, end_time):
        try:
            verbose_logger.debug(
                "Datadog: Logging - Enters logging function for model %s", kwargs
            )
            await self._log_async_event(kwargs, response_obj, start_time, end_time)

        except Exception as e:
            verbose_logger.exception(
                f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
            )
            pass

    async def async_send_batch(self):
        """
        Sends the in memory logs queue to datadog api

        Logs sent to /api/v2/logs

        DD Ref: https://docs.datadoghq.com/api/latest/logs/

        Raises:
            Raises a NON Blocking verbose_logger.exception if an error occurs
        """
        try:
            if not self.log_queue:
                verbose_logger.exception("Datadog: log_queue does not exist")
                return

            verbose_logger.debug(
                "Datadog - about to flush %s events on %s",
                len(self.log_queue),
                self.intake_url,
            )

            response = await self.async_send_compressed_data(self.log_queue)
            if response.status_code == 413:
                verbose_logger.exception(DD_ERRORS.DATADOG_413_ERROR.value)
                return

            response.raise_for_status()
            if response.status_code != 202:
                raise Exception(
                    f"Response from datadog API status_code: {response.status_code}, text: {response.text}"
                )

            verbose_logger.debug(
                "Datadog: Response from datadog API status_code: %s, text: %s",
                response.status_code,
                response.text,
            )
        except Exception as e:
            verbose_logger.exception(
                f"Datadog Error sending batch API - {str(e)}\n{traceback.format_exc()}"
            )

    def log_success_event(self, kwargs, response_obj, start_time, end_time):
        """
        Sync Log success events to Datadog

        - Creates a Datadog payload
        - instantly logs it on DD API
        """
        try:
            if litellm.datadog_use_v1 is True:
                dd_payload = self._create_v0_logging_payload(
                    kwargs=kwargs,
                    response_obj=response_obj,
                    start_time=start_time,
                    end_time=end_time,
                )
            else:
                dd_payload = self.create_datadog_logging_payload(
                    kwargs=kwargs,
                    response_obj=response_obj,
                    start_time=start_time,
                    end_time=end_time,
                )

            response = self.sync_client.post(
                url=self.intake_url,
                json=dd_payload,  # type: ignore
                headers={
                    "DD-API-KEY": self.DD_API_KEY,
                },
            )

            response.raise_for_status()
            if response.status_code != 202:
                raise Exception(
                    f"Response from datadog API status_code: {response.status_code}, text: {response.text}"
                )

            verbose_logger.debug(
                "Datadog: Response from datadog API status_code: %s, text: %s",
                response.status_code,
                response.text,
            )

        except Exception as e:
            verbose_logger.exception(
                f"Datadog Layer Error - {str(e)}\n{traceback.format_exc()}"
            )
            pass
        pass

    async def _log_async_event(self, kwargs, response_obj, start_time, end_time):

        dd_payload = self.create_datadog_logging_payload(
            kwargs=kwargs,
            response_obj=response_obj,
            start_time=start_time,
            end_time=end_time,
        )

        self.log_queue.append(dd_payload)
        verbose_logger.debug(
            f"Datadog, event added to queue. Will flush in {self.flush_interval} seconds..."
        )

        if len(self.log_queue) >= self.batch_size:
            await self.async_send_batch()

    def _create_datadog_logging_payload_helper(
        self,
        standard_logging_object: StandardLoggingPayload,
        status: DataDogStatus,
    ) -> DatadogPayload:
        json_payload = json.dumps(standard_logging_object, default=str)
        verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload)
        dd_payload = DatadogPayload(
            ddsource=self._get_datadog_source(),
            ddtags=self._get_datadog_tags(
                standard_logging_object=standard_logging_object
            ),
            hostname=self._get_datadog_hostname(),
            message=json_payload,
            service=self._get_datadog_service(),
            status=status,
        )
        return dd_payload

    def create_datadog_logging_payload(
        self,
        kwargs: Union[dict, Any],
        response_obj: Any,
        start_time: datetime.datetime,
        end_time: datetime.datetime,
    ) -> DatadogPayload:
        """
        Helper function to create a datadog payload for logging

        Args:
            kwargs (Union[dict, Any]): request kwargs
            response_obj (Any): llm api response
            start_time (datetime.datetime): start time of request
            end_time (datetime.datetime): end time of request

        Returns:
            DatadogPayload: defined in types.py
        """

        standard_logging_object: Optional[StandardLoggingPayload] = kwargs.get(
            "standard_logging_object", None
        )
        if standard_logging_object is None:
            raise ValueError("standard_logging_object not found in kwargs")

        status = DataDogStatus.INFO
        if standard_logging_object.get("status") == "failure":
            status = DataDogStatus.ERROR

        # Build the initial payload
        self.truncate_standard_logging_payload_content(standard_logging_object)

        dd_payload = self._create_datadog_logging_payload_helper(
            standard_logging_object=standard_logging_object,
            status=status,
        )
        return dd_payload

    async def async_send_compressed_data(self, data: List) -> Response:
        """
        Async helper to send compressed data to datadog self.intake_url

        Datadog recommends using gzip to compress data
        https://docs.datadoghq.com/api/latest/logs/

        "Datadog recommends sending your logs compressed. Add the Content-Encoding: gzip header to the request when sending"
        """

        import gzip
        import json

        compressed_data = gzip.compress(json.dumps(data, default=str).encode("utf-8"))
        response = await self.async_client.post(
            url=self.intake_url,
            data=compressed_data,  # type: ignore
            headers={
                "DD-API-KEY": self.DD_API_KEY,
                "Content-Encoding": "gzip",
                "Content-Type": "application/json",
            },
        )
        return response

    async def async_service_failure_hook(
        self,
        payload: ServiceLoggerPayload,
        error: Optional[str] = "",
        parent_otel_span: Optional[Any] = None,
        start_time: Optional[Union[datetimeObj, float]] = None,
        end_time: Optional[Union[float, datetimeObj]] = None,
        event_metadata: Optional[dict] = None,
    ):
        """
        Logs failures from Redis, Postgres (Adjacent systems), as 'WARNING' on DataDog

        - example - Redis is failing / erroring, will be logged on DataDog
        """

        try:
            import json

            _payload_dict = payload.model_dump()
            _dd_message_str = json.dumps(_payload_dict, default=str)
            _dd_payload = DatadogPayload(
                ddsource="litellm",
                ddtags="",
                hostname="",
                message=_dd_message_str,
                service="litellm-server",
                status=DataDogStatus.WARN,
            )

            self.log_queue.append(_dd_payload)

        except Exception as e:
            verbose_logger.exception(
                f"Datadog: Logger - Exception in async_service_failure_hook: {e}"
            )
        pass

    async def async_service_success_hook(
        self,
        payload: ServiceLoggerPayload,
        error: Optional[str] = "",
        parent_otel_span: Optional[Any] = None,
        start_time: Optional[Union[datetimeObj, float]] = None,
        end_time: Optional[Union[float, datetimeObj]] = None,
        event_metadata: Optional[dict] = None,
    ):
        """
        Logs success from Redis, Postgres (Adjacent systems), as 'INFO' on DataDog

        No user has asked for this so far, this might be spammy on datatdog. If need arises we can implement this
        """
        return

    def _create_v0_logging_payload(
        self,
        kwargs: Union[dict, Any],
        response_obj: Any,
        start_time: datetime.datetime,
        end_time: datetime.datetime,
    ) -> DatadogPayload:
        """
        Note: This is our V1 Version of DataDog Logging Payload


        (Not Recommended) If you want this to get logged set `litellm.datadog_use_v1 = True`
        """
        import json

        litellm_params = kwargs.get("litellm_params", {})
        metadata = (
            litellm_params.get("metadata", {}) or {}
        )  # if litellm_params['metadata'] == None
        messages = kwargs.get("messages")
        optional_params = kwargs.get("optional_params", {})
        call_type = kwargs.get("call_type", "litellm.completion")
        cache_hit = kwargs.get("cache_hit", False)
        usage = response_obj["usage"]
        id = response_obj.get("id", str(uuid.uuid4()))
        usage = dict(usage)
        try:
            response_time = (end_time - start_time).total_seconds() * 1000
        except Exception:
            response_time = None

        try:
            response_obj = dict(response_obj)
        except Exception:
            response_obj = response_obj

        # Clean Metadata before logging - never log raw metadata
        # the raw metadata can contain circular references which leads to infinite recursion
        # we clean out all extra litellm metadata params before logging
        clean_metadata = {}
        if isinstance(metadata, dict):
            for key, value in metadata.items():
                # clean litellm metadata before logging
                if key in [
                    "endpoint",
                    "caching_groups",
                    "previous_models",
                ]:
                    continue
                else:
                    clean_metadata[key] = value

        # Build the initial payload
        payload = {
            "id": id,
            "call_type": call_type,
            "cache_hit": cache_hit,
            "start_time": start_time,
            "end_time": end_time,
            "response_time": response_time,
            "model": kwargs.get("model", ""),
            "user": kwargs.get("user", ""),
            "model_parameters": optional_params,
            "spend": kwargs.get("response_cost", 0),
            "messages": messages,
            "response": response_obj,
            "usage": usage,
            "metadata": clean_metadata,
        }

        json_payload = json.dumps(payload, default=str)

        verbose_logger.debug("Datadog: Logger - Logging payload = %s", json_payload)

        dd_payload = DatadogPayload(
            ddsource=self._get_datadog_source(),
            ddtags=self._get_datadog_tags(),
            hostname=self._get_datadog_hostname(),
            message=json_payload,
            service=self._get_datadog_service(),
            status=DataDogStatus.INFO,
        )
        return dd_payload

    @staticmethod
    def _get_datadog_tags(
        standard_logging_object: Optional[StandardLoggingPayload] = None,
    ) -> str:
        """
        Get the datadog tags for the request

        DD tags need to be as follows:
            - tags: ["user_handle:[email protected]", "app_version:1.0.0"]
        """
        base_tags = {
            "env": os.getenv("DD_ENV", "unknown"),
            "service": os.getenv("DD_SERVICE", "litellm"),
            "version": os.getenv("DD_VERSION", "unknown"),
            "HOSTNAME": DataDogLogger._get_datadog_hostname(),
            "POD_NAME": os.getenv("POD_NAME", "unknown"),
        }

        tags = [f"{k}:{v}" for k, v in base_tags.items()]

        if standard_logging_object:
            _request_tags: List[str] = (
                standard_logging_object.get("request_tags", []) or []
            )
            request_tags = [f"request_tag:{tag}" for tag in _request_tags]
            tags.extend(request_tags)

        return ",".join(tags)

    @staticmethod
    def _get_datadog_source():
        return os.getenv("DD_SOURCE", "litellm")

    @staticmethod
    def _get_datadog_service():
        return os.getenv("DD_SERVICE", "litellm-server")

    @staticmethod
    def _get_datadog_hostname():
        return os.getenv("HOSTNAME", "")

    @staticmethod
    def _get_datadog_env():
        return os.getenv("DD_ENV", "unknown")

    @staticmethod
    def _get_datadog_pod_name():
        return os.getenv("POD_NAME", "unknown")

    async def async_health_check(self) -> IntegrationHealthCheckStatus:
        """
        Check if the service is healthy
        """
        from litellm.litellm_core_utils.litellm_logging import (
            create_dummy_standard_logging_payload,
        )

        standard_logging_object = create_dummy_standard_logging_payload()
        dd_payload = self._create_datadog_logging_payload_helper(
            standard_logging_object=standard_logging_object,
            status=DataDogStatus.INFO,
        )
        log_queue = [dd_payload]
        response = await self.async_send_compressed_data(log_queue)
        try:
            response.raise_for_status()
            return IntegrationHealthCheckStatus(
                status="healthy",
                error_message=None,
            )
        except httpx.HTTPStatusError as e:
            return IntegrationHealthCheckStatus(
                status="unhealthy",
                error_message=e.response.text,
            )
        except Exception as e:
            return IntegrationHealthCheckStatus(
                status="unhealthy",
                error_message=str(e),
            )