Llama-3.1-8B-DALv0.1
/
venv
/lib
/python3.12
/site-packages
/prometheus_client
/openmetrics
/parser.py
#!/usr/bin/env python | |
import io as StringIO | |
import math | |
import re | |
from ..metrics_core import Metric, METRIC_LABEL_NAME_RE | |
from ..samples import Exemplar, Sample, Timestamp | |
from ..utils import floatToGoString | |
def text_string_to_metric_families(text): | |
"""Parse Openmetrics text format from a unicode string. | |
See text_fd_to_metric_families. | |
""" | |
yield from text_fd_to_metric_families(StringIO.StringIO(text)) | |
_CANONICAL_NUMBERS = {float("inf")} | |
def _isUncanonicalNumber(s): | |
f = float(s) | |
if f not in _CANONICAL_NUMBERS: | |
return False # Only the canonical numbers are required to be canonical. | |
return s != floatToGoString(f) | |
ESCAPE_SEQUENCES = { | |
'\\\\': '\\', | |
'\\n': '\n', | |
'\\"': '"', | |
} | |
def _replace_escape_sequence(match): | |
return ESCAPE_SEQUENCES[match.group(0)] | |
ESCAPING_RE = re.compile(r'\\[\\n"]') | |
def _replace_escaping(s): | |
return ESCAPING_RE.sub(_replace_escape_sequence, s) | |
def _unescape_help(text): | |
result = [] | |
slash = False | |
for char in text: | |
if slash: | |
if char == '\\': | |
result.append('\\') | |
elif char == '"': | |
result.append('"') | |
elif char == 'n': | |
result.append('\n') | |
else: | |
result.append('\\' + char) | |
slash = False | |
else: | |
if char == '\\': | |
slash = True | |
else: | |
result.append(char) | |
if slash: | |
result.append('\\') | |
return ''.join(result) | |
def _parse_value(value): | |
value = ''.join(value) | |
if value != value.strip() or '_' in value: | |
raise ValueError(f"Invalid value: {value!r}") | |
try: | |
return int(value) | |
except ValueError: | |
return float(value) | |
def _parse_timestamp(timestamp): | |
timestamp = ''.join(timestamp) | |
if not timestamp: | |
return None | |
if timestamp != timestamp.strip() or '_' in timestamp: | |
raise ValueError(f"Invalid timestamp: {timestamp!r}") | |
try: | |
# Simple int. | |
return Timestamp(int(timestamp), 0) | |
except ValueError: | |
try: | |
# aaaa.bbbb. Nanosecond resolution supported. | |
parts = timestamp.split('.', 1) | |
return Timestamp(int(parts[0]), int(parts[1][:9].ljust(9, "0"))) | |
except ValueError: | |
# Float. | |
ts = float(timestamp) | |
if math.isnan(ts) or math.isinf(ts): | |
raise ValueError(f"Invalid timestamp: {timestamp!r}") | |
return ts | |
def _is_character_escaped(s, charpos): | |
num_bslashes = 0 | |
while (charpos > num_bslashes | |
and s[charpos - 1 - num_bslashes] == '\\'): | |
num_bslashes += 1 | |
return num_bslashes % 2 == 1 | |
def _parse_labels_with_state_machine(text): | |
# The { has already been parsed. | |
state = 'startoflabelname' | |
labelname = [] | |
labelvalue = [] | |
labels = {} | |
labels_len = 0 | |
for char in text: | |
if state == 'startoflabelname': | |
if char == '}': | |
state = 'endoflabels' | |
else: | |
state = 'labelname' | |
labelname.append(char) | |
elif state == 'labelname': | |
if char == '=': | |
state = 'labelvaluequote' | |
else: | |
labelname.append(char) | |
elif state == 'labelvaluequote': | |
if char == '"': | |
state = 'labelvalue' | |
else: | |
raise ValueError("Invalid line: " + text) | |
elif state == 'labelvalue': | |
if char == '\\': | |
state = 'labelvalueslash' | |
elif char == '"': | |
ln = ''.join(labelname) | |
if not METRIC_LABEL_NAME_RE.match(ln): | |
raise ValueError("Invalid line, bad label name: " + text) | |
if ln in labels: | |
raise ValueError("Invalid line, duplicate label name: " + text) | |
labels[ln] = ''.join(labelvalue) | |
labelname = [] | |
labelvalue = [] | |
state = 'endoflabelvalue' | |
else: | |
labelvalue.append(char) | |
elif state == 'endoflabelvalue': | |
if char == ',': | |
state = 'labelname' | |
elif char == '}': | |
state = 'endoflabels' | |
else: | |
raise ValueError("Invalid line: " + text) | |
elif state == 'labelvalueslash': | |
state = 'labelvalue' | |
if char == '\\': | |
labelvalue.append('\\') | |
elif char == 'n': | |
labelvalue.append('\n') | |
elif char == '"': | |
labelvalue.append('"') | |
else: | |
labelvalue.append('\\' + char) | |
elif state == 'endoflabels': | |
if char == ' ': | |
break | |
else: | |
raise ValueError("Invalid line: " + text) | |
labels_len += 1 | |
return labels, labels_len | |
def _parse_labels(text): | |
labels = {} | |
# Raise error if we don't have valid labels | |
if text and "=" not in text: | |
raise ValueError | |
# Copy original labels | |
sub_labels = text | |
try: | |
# Process one label at a time | |
while sub_labels: | |
# The label name is before the equal | |
value_start = sub_labels.index("=") | |
label_name = sub_labels[:value_start] | |
sub_labels = sub_labels[value_start + 1:] | |
# Check for missing quotes | |
if not sub_labels or sub_labels[0] != '"': | |
raise ValueError | |
# The first quote is guaranteed to be after the equal | |
value_substr = sub_labels[1:] | |
# Check for extra commas | |
if not label_name or label_name[0] == ',': | |
raise ValueError | |
if not value_substr or value_substr[-1] == ',': | |
raise ValueError | |
# Find the last unescaped quote | |
i = 0 | |
while i < len(value_substr): | |
i = value_substr.index('"', i) | |
if not _is_character_escaped(value_substr[:i], i): | |
break | |
i += 1 | |
# The label value is between the first and last quote | |
quote_end = i + 1 | |
label_value = sub_labels[1:quote_end] | |
# Replace escaping if needed | |
if "\\" in label_value: | |
label_value = _replace_escaping(label_value) | |
if not METRIC_LABEL_NAME_RE.match(label_name): | |
raise ValueError("invalid line, bad label name: " + text) | |
if label_name in labels: | |
raise ValueError("invalid line, duplicate label name: " + text) | |
labels[label_name] = label_value | |
# Remove the processed label from the sub-slice for next iteration | |
sub_labels = sub_labels[quote_end + 1:] | |
if sub_labels.startswith(","): | |
next_comma = 1 | |
else: | |
next_comma = 0 | |
sub_labels = sub_labels[next_comma:] | |
# Check for missing commas | |
if sub_labels and next_comma == 0: | |
raise ValueError | |
return labels | |
except ValueError: | |
raise ValueError("Invalid labels: " + text) | |
def _parse_sample(text): | |
separator = " # " | |
# Detect the labels in the text | |
label_start = text.find("{") | |
if label_start == -1 or separator in text[:label_start]: | |
# We don't have labels, but there could be an exemplar. | |
name_end = text.index(" ") | |
name = text[:name_end] | |
# Parse the remaining text after the name | |
remaining_text = text[name_end + 1:] | |
value, timestamp, exemplar = _parse_remaining_text(remaining_text) | |
return Sample(name, {}, value, timestamp, exemplar) | |
# The name is before the labels | |
name = text[:label_start] | |
if separator not in text: | |
# Line doesn't contain an exemplar | |
# We can use `rindex` to find `label_end` | |
label_end = text.rindex("}") | |
label = text[label_start + 1:label_end] | |
labels = _parse_labels(label) | |
else: | |
# Line potentially contains an exemplar | |
# Fallback to parsing labels with a state machine | |
labels, labels_len = _parse_labels_with_state_machine(text[label_start + 1:]) | |
label_end = labels_len + len(name) | |
# Parsing labels succeeded, continue parsing the remaining text | |
remaining_text = text[label_end + 2:] | |
value, timestamp, exemplar = _parse_remaining_text(remaining_text) | |
return Sample(name, labels, value, timestamp, exemplar) | |
def _parse_remaining_text(text): | |
split_text = text.split(" ", 1) | |
val = _parse_value(split_text[0]) | |
if len(split_text) == 1: | |
# We don't have timestamp or exemplar | |
return val, None, None | |
timestamp = [] | |
exemplar_value = [] | |
exemplar_timestamp = [] | |
exemplar_labels = None | |
state = 'timestamp' | |
text = split_text[1] | |
it = iter(text) | |
for char in it: | |
if state == 'timestamp': | |
if char == '#' and not timestamp: | |
state = 'exemplarspace' | |
elif char == ' ': | |
state = 'exemplarhash' | |
else: | |
timestamp.append(char) | |
elif state == 'exemplarhash': | |
if char == '#': | |
state = 'exemplarspace' | |
else: | |
raise ValueError("Invalid line: " + text) | |
elif state == 'exemplarspace': | |
if char == ' ': | |
state = 'exemplarstartoflabels' | |
else: | |
raise ValueError("Invalid line: " + text) | |
elif state == 'exemplarstartoflabels': | |
if char == '{': | |
label_start, label_end = text.index("{"), text.rindex("}") | |
exemplar_labels = _parse_labels(text[label_start + 1:label_end]) | |
state = 'exemplarparsedlabels' | |
else: | |
raise ValueError("Invalid line: " + text) | |
elif state == 'exemplarparsedlabels': | |
if char == '}': | |
state = 'exemplarvaluespace' | |
elif state == 'exemplarvaluespace': | |
if char == ' ': | |
state = 'exemplarvalue' | |
else: | |
raise ValueError("Invalid line: " + text) | |
elif state == 'exemplarvalue': | |
if char == ' ' and not exemplar_value: | |
raise ValueError("Invalid line: " + text) | |
elif char == ' ': | |
state = 'exemplartimestamp' | |
else: | |
exemplar_value.append(char) | |
elif state == 'exemplartimestamp': | |
exemplar_timestamp.append(char) | |
# Trailing space after value. | |
if state == 'timestamp' and not timestamp: | |
raise ValueError("Invalid line: " + text) | |
# Trailing space after value. | |
if state == 'exemplartimestamp' and not exemplar_timestamp: | |
raise ValueError("Invalid line: " + text) | |
# Incomplete exemplar. | |
if state in ['exemplarhash', 'exemplarspace', 'exemplarstartoflabels', 'exemplarparsedlabels']: | |
raise ValueError("Invalid line: " + text) | |
ts = _parse_timestamp(timestamp) | |
exemplar = None | |
if exemplar_labels is not None: | |
exemplar_length = sum(len(k) + len(v) for k, v in exemplar_labels.items()) | |
if exemplar_length > 128: | |
raise ValueError("Exemplar labels are too long: " + text) | |
exemplar = Exemplar( | |
exemplar_labels, | |
_parse_value(exemplar_value), | |
_parse_timestamp(exemplar_timestamp), | |
) | |
return val, ts, exemplar | |
def _group_for_sample(sample, name, typ): | |
if typ == 'info': | |
# We can't distinguish between groups for info metrics. | |
return {} | |
if typ == 'summary' and sample.name == name: | |
d = sample.labels.copy() | |
del d['quantile'] | |
return d | |
if typ == 'stateset': | |
d = sample.labels.copy() | |
del d[name] | |
return d | |
if typ in ['histogram', 'gaugehistogram'] and sample.name == name + '_bucket': | |
d = sample.labels.copy() | |
del d['le'] | |
return d | |
return sample.labels | |
def _check_histogram(samples, name): | |
group = None | |
timestamp = None | |
def do_checks(): | |
if bucket != float('+Inf'): | |
raise ValueError("+Inf bucket missing: " + name) | |
if count is not None and value != count: | |
raise ValueError("Count does not match +Inf value: " + name) | |
if has_sum and count is None: | |
raise ValueError("_count must be present if _sum is present: " + name) | |
if has_gsum and count is None: | |
raise ValueError("_gcount must be present if _gsum is present: " + name) | |
if not (has_sum or has_gsum) and count is not None: | |
raise ValueError("_sum/_gsum must be present if _count is present: " + name) | |
if has_negative_buckets and has_sum: | |
raise ValueError("Cannot have _sum with negative buckets: " + name) | |
if not has_negative_buckets and has_negative_gsum: | |
raise ValueError("Cannot have negative _gsum with non-negative buckets: " + name) | |
for s in samples: | |
suffix = s.name[len(name):] | |
g = _group_for_sample(s, name, 'histogram') | |
if g != group or s.timestamp != timestamp: | |
if group is not None: | |
do_checks() | |
count = None | |
bucket = None | |
has_negative_buckets = False | |
has_sum = False | |
has_gsum = False | |
has_negative_gsum = False | |
value = 0 | |
group = g | |
timestamp = s.timestamp | |
if suffix == '_bucket': | |
b = float(s.labels['le']) | |
if b < 0: | |
has_negative_buckets = True | |
if bucket is not None and b <= bucket: | |
raise ValueError("Buckets out of order: " + name) | |
if s.value < value: | |
raise ValueError("Bucket values out of order: " + name) | |
bucket = b | |
value = s.value | |
elif suffix in ['_count', '_gcount']: | |
count = s.value | |
elif suffix in ['_sum']: | |
has_sum = True | |
elif suffix in ['_gsum']: | |
has_gsum = True | |
if s.value < 0: | |
has_negative_gsum = True | |
if group is not None: | |
do_checks() | |
def text_fd_to_metric_families(fd): | |
"""Parse Prometheus text format from a file descriptor. | |
This is a laxer parser than the main Go parser, | |
so successful parsing does not imply that the parsed | |
text meets the specification. | |
Yields Metric's. | |
""" | |
name = None | |
allowed_names = [] | |
eof = False | |
seen_names = set() | |
type_suffixes = { | |
'counter': ['_total', '_created'], | |
'summary': ['', '_count', '_sum', '_created'], | |
'histogram': ['_count', '_sum', '_bucket', '_created'], | |
'gaugehistogram': ['_gcount', '_gsum', '_bucket'], | |
'info': ['_info'], | |
} | |
def build_metric(name, documentation, typ, unit, samples): | |
if typ is None: | |
typ = 'unknown' | |
for suffix in set(type_suffixes.get(typ, []) + [""]): | |
if name + suffix in seen_names: | |
raise ValueError("Clashing name: " + name + suffix) | |
seen_names.add(name + suffix) | |
if documentation is None: | |
documentation = '' | |
if unit is None: | |
unit = '' | |
if unit and not name.endswith("_" + unit): | |
raise ValueError("Unit does not match metric name: " + name) | |
if unit and typ in ['info', 'stateset']: | |
raise ValueError("Units not allowed for this metric type: " + name) | |
if typ in ['histogram', 'gaugehistogram']: | |
_check_histogram(samples, name) | |
metric = Metric(name, documentation, typ, unit) | |
# TODO: check labelvalues are valid utf8 | |
metric.samples = samples | |
return metric | |
for line in fd: | |
if line[-1] == '\n': | |
line = line[:-1] | |
if eof: | |
raise ValueError("Received line after # EOF: " + line) | |
if not line: | |
raise ValueError("Received blank line") | |
if line == '# EOF': | |
eof = True | |
elif line.startswith('#'): | |
parts = line.split(' ', 3) | |
if len(parts) < 4: | |
raise ValueError("Invalid line: " + line) | |
if parts[2] == name and samples: | |
raise ValueError("Received metadata after samples: " + line) | |
if parts[2] != name: | |
if name is not None: | |
yield build_metric(name, documentation, typ, unit, samples) | |
# New metric | |
name = parts[2] | |
unit = None | |
typ = None | |
documentation = None | |
group = None | |
seen_groups = set() | |
group_timestamp = None | |
group_timestamp_samples = set() | |
samples = [] | |
allowed_names = [parts[2]] | |
if parts[1] == 'HELP': | |
if documentation is not None: | |
raise ValueError("More than one HELP for metric: " + line) | |
documentation = _unescape_help(parts[3]) | |
elif parts[1] == 'TYPE': | |
if typ is not None: | |
raise ValueError("More than one TYPE for metric: " + line) | |
typ = parts[3] | |
if typ == 'untyped': | |
raise ValueError("Invalid TYPE for metric: " + line) | |
allowed_names = [name + n for n in type_suffixes.get(typ, [''])] | |
elif parts[1] == 'UNIT': | |
if unit is not None: | |
raise ValueError("More than one UNIT for metric: " + line) | |
unit = parts[3] | |
else: | |
raise ValueError("Invalid line: " + line) | |
else: | |
sample = _parse_sample(line) | |
if sample.name not in allowed_names: | |
if name is not None: | |
yield build_metric(name, documentation, typ, unit, samples) | |
# Start an unknown metric. | |
name = sample.name | |
documentation = None | |
unit = None | |
typ = 'unknown' | |
samples = [] | |
group = None | |
group_timestamp = None | |
group_timestamp_samples = set() | |
seen_groups = set() | |
allowed_names = [sample.name] | |
if typ == 'stateset' and name not in sample.labels: | |
raise ValueError("Stateset missing label: " + line) | |
if (name + '_bucket' == sample.name | |
and (sample.labels.get('le', "NaN") == "NaN" | |
or _isUncanonicalNumber(sample.labels['le']))): | |
raise ValueError("Invalid le label: " + line) | |
if (name + '_bucket' == sample.name | |
and (not isinstance(sample.value, int) and not sample.value.is_integer())): | |
raise ValueError("Bucket value must be an integer: " + line) | |
if ((name + '_count' == sample.name or name + '_gcount' == sample.name) | |
and (not isinstance(sample.value, int) and not sample.value.is_integer())): | |
raise ValueError("Count value must be an integer: " + line) | |
if (typ == 'summary' and name == sample.name | |
and (not (0 <= float(sample.labels.get('quantile', -1)) <= 1) | |
or _isUncanonicalNumber(sample.labels['quantile']))): | |
raise ValueError("Invalid quantile label: " + line) | |
g = tuple(sorted(_group_for_sample(sample, name, typ).items())) | |
if group is not None and g != group and g in seen_groups: | |
raise ValueError("Invalid metric grouping: " + line) | |
if group is not None and g == group: | |
if (sample.timestamp is None) != (group_timestamp is None): | |
raise ValueError("Mix of timestamp presence within a group: " + line) | |
if group_timestamp is not None and group_timestamp > sample.timestamp and typ != 'info': | |
raise ValueError("Timestamps went backwards within a group: " + line) | |
else: | |
group_timestamp_samples = set() | |
series_id = (sample.name, tuple(sorted(sample.labels.items()))) | |
if sample.timestamp != group_timestamp or series_id not in group_timestamp_samples: | |
# Not a duplicate due to timestamp truncation. | |
samples.append(sample) | |
group_timestamp_samples.add(series_id) | |
group = g | |
group_timestamp = sample.timestamp | |
seen_groups.add(g) | |
if typ == 'stateset' and sample.value not in [0, 1]: | |
raise ValueError("Stateset samples can only have values zero and one: " + line) | |
if typ == 'info' and sample.value != 1: | |
raise ValueError("Info samples can only have value one: " + line) | |
if typ == 'summary' and name == sample.name and sample.value < 0: | |
raise ValueError("Quantile values cannot be negative: " + line) | |
if sample.name[len(name):] in ['_total', '_sum', '_count', '_bucket', '_gcount', '_gsum'] and math.isnan( | |
sample.value): | |
raise ValueError("Counter-like samples cannot be NaN: " + line) | |
if sample.name[len(name):] in ['_total', '_sum', '_count', '_bucket', '_gcount'] and sample.value < 0: | |
raise ValueError("Counter-like samples cannot be negative: " + line) | |
if sample.exemplar and not ( | |
(typ in ['histogram', 'gaugehistogram'] and sample.name.endswith('_bucket')) | |
or (typ in ['counter'] and sample.name.endswith('_total'))): | |
raise ValueError("Invalid line only histogram/gaugehistogram buckets and counters can have exemplars: " + line) | |
if name is not None: | |
yield build_metric(name, documentation, typ, unit, samples) | |
if not eof: | |
raise ValueError("Missing # EOF at end") | |