|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
import time |
|
import pyarrow as pa |
|
|
|
|
|
class HighLatencyReader(object): |
|
|
|
def __init__(self, raw, latency): |
|
self.raw = raw |
|
self.latency = latency |
|
|
|
def close(self): |
|
self.raw.close() |
|
|
|
@property |
|
def closed(self): |
|
return self.raw.closed |
|
|
|
def read(self, nbytes=None): |
|
time.sleep(self.latency) |
|
return self.raw.read(nbytes) |
|
|
|
|
|
class HighLatencyWriter(object): |
|
|
|
def __init__(self, raw, latency): |
|
self.raw = raw |
|
self.latency = latency |
|
|
|
def close(self): |
|
self.raw.close() |
|
|
|
@property |
|
def closed(self): |
|
return self.raw.closed |
|
|
|
def write(self, data): |
|
time.sleep(self.latency) |
|
self.raw.write(data) |
|
|
|
|
|
class BufferedIOHighLatency(object): |
|
"""Benchmark creating a parquet manifest.""" |
|
|
|
increment = 1024 |
|
total_size = 16 * (1 << 20) |
|
buffer_size = 1 << 20 |
|
latency = 0.1 |
|
|
|
param_names = ('latency',) |
|
params = [0, 0.01, 0.1] |
|
|
|
def time_buffered_writes(self, latency): |
|
test_data = b'x' * self.increment |
|
bytes_written = 0 |
|
out = pa.BufferOutputStream() |
|
slow_out = HighLatencyWriter(out, latency) |
|
buffered_out = pa.output_stream(slow_out, buffer_size=self.buffer_size) |
|
|
|
while bytes_written < self.total_size: |
|
buffered_out.write(test_data) |
|
bytes_written += self.increment |
|
buffered_out.flush() |
|
|
|
def time_buffered_reads(self, latency): |
|
bytes_read = 0 |
|
reader = pa.input_stream(pa.py_buffer(b'x' * self.total_size)) |
|
slow_reader = HighLatencyReader(reader, latency) |
|
buffered_reader = pa.input_stream(slow_reader, |
|
buffer_size=self.buffer_size) |
|
while bytes_read < self.total_size: |
|
buffered_reader.read(self.increment) |
|
bytes_read += self.increment |
|
|