Skip to content

Commit 9e92f6b

Browse files
Pijukatelvdusek
andauthored
fix: Use perf_counter_ns for request duration tracking (#1260)
### Description Replace `datetime.now` by `time.perf_counter_ns` in time difference calculations for request duration tracking to avoid possible issues connected to measured time resolution. This fixes the flakiness of `test_final_statistics` that was happening on Windows and Python versions < 3.13. ### Issues - Closes: #1256 --------- Co-authored-by: Vlada Dusek <[email protected]>
1 parent ad66c82 commit 9e92f6b

File tree

2 files changed

+22
-4
lines changed

2 files changed

+22
-4
lines changed

src/crawlee/statistics/_statistics.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from __future__ import annotations
33

44
import math
5+
import time
56
from datetime import datetime, timedelta, timezone
67
from logging import Logger, getLogger
78
from typing import TYPE_CHECKING, Generic, Literal
@@ -27,22 +28,22 @@ class RequestProcessingRecord:
2728
"""Tracks information about the processing of a request."""
2829

2930
def __init__(self) -> None:
30-
self._last_run_at: datetime | None = None
31+
self._last_run_at_ns: int | None = None
3132
self._runs = 0
3233
self.duration: timedelta | None = None
3334

3435
def run(self) -> int:
3536
"""Mark the job as started."""
36-
self._last_run_at = datetime.now(timezone.utc)
37+
self._last_run_at_ns = time.perf_counter_ns()
3738
self._runs += 1
3839
return self._runs
3940

4041
def finish(self) -> timedelta:
4142
"""Mark the job as finished."""
42-
if self._last_run_at is None:
43+
if self._last_run_at_ns is None:
4344
raise RuntimeError('Invalid state')
4445

45-
self.duration = datetime.now(timezone.utc) - self._last_run_at
46+
self.duration = timedelta(microseconds=math.ceil((time.perf_counter_ns() - self._last_run_at_ns) / 1000))
4647
return self.duration
4748

4849
@property
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
from datetime import timedelta
2+
3+
from crawlee.statistics._statistics import RequestProcessingRecord
4+
5+
6+
def test_tracking_time_resolution() -> None:
7+
"""Test that `RequestProcessingRecord` tracks time with sufficient resolution.
8+
9+
This is generally not an issue on Linux, but on Windows some packages in older Python versions might be using system
10+
timers with not so granular resolution - some sources estimate 15ms. This test will start failing on Windows
11+
if unsuitable source of time measurement is selected due to two successive time measurements possibly using same
12+
timing sample."""
13+
record = RequestProcessingRecord()
14+
record.run()
15+
record.finish()
16+
assert record.duration
17+
assert record.duration > timedelta(seconds=0)

0 commit comments

Comments
 (0)