Skip to content

Commit 38f0b50

Browse files
Merge pull request #9706 from cvat-ai/release-2.43.0
Release v2.43.0
2 parents dd46f3d + 6ab646f commit 38f0b50

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

43 files changed

+1799
-187
lines changed

.github/workflows/main.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -516,8 +516,9 @@ jobs:
516516
LOGS_DIR: "${{ github.workspace }}/rest_api_testing"
517517
run: |
518518
mkdir ${LOGS_DIR}
519-
kubectl logs $(kubectl get pods -l component=server -o 'jsonpath={.items[0].metadata.name}') >${LOGS_DIR}/cvat_server.log
520-
kubectl logs $(kubectl get pods -l component=worker-utils -o 'jsonpath={.items[0].metadata.name}') >${LOGS_DIR}/cvat_workers.log
519+
for backend_pod in $(kubectl get pods -l tier=backend -o 'jsonpath={.items[*].metadata.name}'); do
520+
kubectl logs ${backend_pod} >${LOGS_DIR}/${backend_pod}.log
521+
done
521522
kubectl logs $(kubectl get pods -l app.kubernetes.io/name=traefik -o 'jsonpath={.items[0].metadata.name}') >${LOGS_DIR}/traefik.log
522523
523524
- name: Uploading "cvat" container logs as an artifact

CHANGELOG.md

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,30 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1616

1717
<!-- scriv-insert-here -->
1818

19+
<a id='changelog-2.43.0'></a>
20+
## \[2.43.0\] - 2025-08-07
21+
22+
### Added
23+
24+
- Django command to remove user with all resources `python manage.py deleteuser <user_id>`
25+
(<https://github.com/cvat-ai/cvat/pull/9686>)
26+
27+
### Changed
28+
29+
- Better validation of fields specified in raw labels editor
30+
(<https://github.com/cvat-ai/cvat/pull/9677>)
31+
32+
- Optimized preview requests for Projects, Tasks, Jobs, etc. — now sent sequentially to reduce load on the server
33+
(<https://github.com/cvat-ai/cvat_enterprise/pull/9692>)
34+
35+
### Fixed
36+
37+
- Issue dialogs appear outside the visible area when the issue is located near the right or bottom edges of the frame
38+
(<https://github.com/cvat-ai/cvat/pull/9642>)
39+
40+
- Job meta could include `deleted_frames` outside the job
41+
(<https://github.com/cvat-ai/cvat/pull/9690>)
42+
1943
<a id='changelog-2.42.0'></a>
2044
## \[2.42.0\] - 2025-07-29
2145

ai-models/README.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
This directory contains various auto-annotation function implementations
2+
for use with the CVAT SDK and CLI.
3+
These functions use popular deep learning models to perform various computer vision tasks.
4+
5+
Consult the [Auto-annotation API reference][apiref] for general information about AA functions.
6+
7+
[apiref]: https://docs.cvat.ai/docs/api_sdk/sdk/auto-annotation/
8+
9+
For information on each AA function, see the `README.md` file in its directory.
10+
Each function also has a `requirements.txt` file describing Python packages
11+
that you must install in order to use it.

ai-models/tracker/sam2/README.md

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
# SAM2 tracker
2+
3+
This directory contains an implementation of a CVAT auto-annotation function
4+
that tracks masks and polygons using the [Segment Anything Model 2][sam2] (SAM2)
5+
from Meta Research.
6+
7+
[sam2]: https://github.com/facebookresearch/sam2
8+
9+
To use this with CVAT CLI, use the following options:
10+
11+
```
12+
--function-file func.py -p model_id=str:<model_id>
13+
```
14+
15+
where `<model_id>` is one of the [SAM2 model IDs][sam2-hf] from Meta's Hugging Face account,
16+
such as `facebook/sam2.1-hiera-tiny` or `facebook/sam2.1-hiera-large`.
17+
18+
[sam2-hf]: https://huggingface.co/models?search=facebook%2Fsam2
19+
20+
In addition, you can add `-p device=str:<device>` to run the model on a specific PyTorch device,
21+
such as `cuda`. By default, the model will be run on the CPU.
22+
23+
All other parameters set with the `-p` option will be passed directly to the model constructor.

ai-models/tracker/sam2/func.py

Lines changed: 225 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,225 @@
1+
# Copyright (C) CVAT.ai Corporation
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
import collections
6+
import dataclasses
7+
from typing import Optional, TypedDict
8+
9+
import cv2
10+
import cvat_sdk.auto_annotation as cvataa
11+
import numpy as np
12+
import PIL.Image
13+
import torch
14+
import torchvision.transforms
15+
from cvat_sdk.masks import decode_mask, encode_mask
16+
from sam2.sam2_video_predictor import SAM2VideoPredictor
17+
from sam2.utils.misc import fill_holes_in_mask_scores
18+
19+
20+
@dataclasses.dataclass(frozen=True, kw_only=True)
21+
class _PreprocessedImage:
22+
original_width: int
23+
original_height: int
24+
vision_feats: list[torch.Tensor]
25+
vision_pos_embeds: list[torch.Tensor]
26+
feat_sizes: list[tuple[int, int]]
27+
28+
29+
class _PredictorOutputs(TypedDict):
30+
# We always keep 1 cond_frame_outputs and up to num_maskmem non_cond_frame_outputs.
31+
32+
cond_frame_outputs: dict[int, dict]
33+
# We make this an OrderedDict to make popping old elements easier.
34+
non_cond_frame_outputs: collections.OrderedDict[int, dict]
35+
36+
37+
@dataclasses.dataclass(kw_only=True)
38+
class _TrackingState:
39+
frame_idx: int
40+
predictor_outputs: _PredictorOutputs
41+
42+
43+
class _Sam2Tracker:
44+
def __init__(self, model_id: str, device: str = "cpu", **kwargs) -> None:
45+
self._device = torch.device(device)
46+
47+
if self._device.type == "cuda":
48+
torch.set_autocast_enabled(True)
49+
torch.set_autocast_gpu_dtype(torch.bfloat16)
50+
if torch.cuda.get_device_properties(self._device).major >= 8:
51+
torch.backends.cuda.matmul.allow_tf32 = True
52+
torch.backends.cudnn.allow_tf32 = True
53+
54+
self._predictor = SAM2VideoPredictor.from_pretrained(
55+
model_id, device=self._device, **kwargs
56+
)
57+
self._transform = torchvision.transforms.Compose(
58+
[
59+
torchvision.transforms.Resize(
60+
(self._predictor.image_size, self._predictor.image_size)
61+
),
62+
torchvision.transforms.ToTensor(),
63+
torchvision.transforms.Normalize(
64+
# see load_video_frames in the SAM2 source
65+
mean=(0.485, 0.456, 0.406),
66+
std=(0.229, 0.224, 0.225),
67+
),
68+
]
69+
)
70+
71+
spec = cvataa.TrackingFunctionSpec(supported_shape_types=["mask", "polygon"])
72+
73+
@torch.inference_mode()
74+
def preprocess_image(
75+
self, context: cvataa.TrackingFunctionContext, image: PIL.Image.Image
76+
) -> _PreprocessedImage:
77+
image = image.convert("RGB")
78+
79+
image_tensor = self._transform(image).unsqueeze(0).to(device=self._device)
80+
backbone_out = self._predictor.forward_image(image_tensor)
81+
vision_feats = backbone_out["backbone_fpn"][-self._predictor.num_feature_levels :]
82+
vision_pos_embeds = backbone_out["vision_pos_enc"][-self._predictor.num_feature_levels :]
83+
84+
return _PreprocessedImage(
85+
original_width=image.width,
86+
original_height=image.height,
87+
vision_feats=[x.flatten(2).permute(2, 0, 1) for x in vision_feats],
88+
vision_pos_embeds=[x.flatten(2).permute(2, 0, 1) for x in vision_pos_embeds],
89+
feat_sizes=[(x.shape[-2], x.shape[-1]) for x in vision_pos_embeds],
90+
)
91+
92+
def _call_predictor(self, *, pp_image: _PreprocessedImage, frame_idx: int, **kwargs) -> dict:
93+
out = self._predictor.track_step(
94+
current_vision_feats=pp_image.vision_feats,
95+
current_vision_pos_embeds=pp_image.vision_pos_embeds,
96+
feat_sizes=pp_image.feat_sizes,
97+
point_inputs=None,
98+
frame_idx=frame_idx,
99+
num_frames=frame_idx + 1,
100+
**kwargs,
101+
)
102+
103+
return {
104+
"maskmem_features": out["maskmem_features"],
105+
"maskmem_pos_enc": out["maskmem_pos_enc"][-1:],
106+
"pred_masks": fill_holes_in_mask_scores(
107+
out["pred_masks"], self._predictor.fill_hole_area
108+
),
109+
"obj_ptr": out["obj_ptr"],
110+
}
111+
112+
def _shape_to_mask(
113+
self, pp_image: _PreprocessedImage, shape: cvataa.TrackableShape
114+
) -> np.ndarray:
115+
if shape.type == "mask":
116+
return decode_mask(
117+
shape.points,
118+
image_width=pp_image.original_width,
119+
image_height=pp_image.original_height,
120+
)
121+
122+
if shape.type == "polygon":
123+
mask = np.zeros((pp_image.original_height, pp_image.original_width), dtype=np.uint8)
124+
points_array = np.array(shape.points, dtype=np.int32).reshape((-1, 2))
125+
cv2.fillPoly(mask, [points_array], 1)
126+
return mask
127+
128+
assert False, f"unexpected shape type {shape.type!r}"
129+
130+
@torch.inference_mode()
131+
def init_tracking_state(
132+
self,
133+
context: cvataa.TrackingFunctionShapeContext,
134+
pp_image: _PreprocessedImage,
135+
shape: cvataa.TrackableShape,
136+
) -> _TrackingState:
137+
mask = torch.from_numpy(self._shape_to_mask(pp_image, shape))
138+
139+
resized_mask = torch.nn.functional.interpolate(
140+
mask.float()[None, None], # add batch and channel dimensions
141+
(self._predictor.image_size, self._predictor.image_size),
142+
mode="bilinear",
143+
align_corners=False,
144+
)
145+
resized_mask = (resized_mask >= 0.5).float().to(device=self._device)
146+
147+
current_out = self._call_predictor(
148+
pp_image=pp_image,
149+
frame_idx=0,
150+
is_init_cond_frame=True,
151+
mask_inputs=resized_mask,
152+
output_dict={},
153+
)
154+
155+
return _TrackingState(
156+
frame_idx=0,
157+
predictor_outputs={
158+
"cond_frame_outputs": {0: current_out},
159+
"non_cond_frame_outputs": collections.OrderedDict(),
160+
},
161+
)
162+
163+
def _mask_to_shape(
164+
self, context: cvataa.TrackingFunctionShapeContext, mask: torch.Tensor
165+
) -> Optional[cvataa.TrackableShape]:
166+
if context.original_shape_type == "mask":
167+
return cvataa.TrackableShape(type="mask", points=encode_mask(mask))
168+
169+
if context.original_shape_type == "polygon":
170+
mask_np = np.asarray(mask, dtype=np.uint8)
171+
contours, _ = cv2.findContours(mask_np, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
172+
if not contours:
173+
return None
174+
175+
largest_contour = max(contours, key=cv2.contourArea)
176+
approx_contour = cv2.approxPolyDP(largest_contour, epsilon=1.0, closed=True)
177+
if approx_contour.shape[0] < 3:
178+
return None
179+
180+
return cvataa.TrackableShape(type="polygon", points=approx_contour.flatten().tolist())
181+
182+
assert False, f"unexpected shape type {context.original_shape_type!r}"
183+
184+
@torch.inference_mode()
185+
def track(
186+
self,
187+
context: cvataa.TrackingFunctionShapeContext,
188+
pp_image: _PreprocessedImage,
189+
state: _TrackingState,
190+
) -> Optional[cvataa.TrackableShape]:
191+
state.frame_idx += 1
192+
193+
current_out = self._call_predictor(
194+
pp_image=pp_image,
195+
frame_idx=state.frame_idx,
196+
is_init_cond_frame=False,
197+
mask_inputs=None,
198+
output_dict=state.predictor_outputs,
199+
)
200+
201+
non_cond_frame_outputs = state.predictor_outputs["non_cond_frame_outputs"]
202+
non_cond_frame_outputs[state.frame_idx] = current_out
203+
204+
# discard old outputs as the predictor uses up to num_maskmem elements
205+
while len(non_cond_frame_outputs) > self._predictor.num_maskmem:
206+
non_cond_frame_outputs.popitem(last=False)
207+
208+
output_mask = (
209+
torch.nn.functional.interpolate(
210+
current_out["pred_masks"],
211+
size=(pp_image.original_height, pp_image.original_width),
212+
align_corners=False,
213+
mode="bilinear",
214+
antialias=True,
215+
)[0, 0]
216+
> 0
217+
)
218+
219+
if output_mask.any():
220+
return self._mask_to_shape(context, output_mask.cpu())
221+
else:
222+
return None
223+
224+
225+
create = _Sam2Tracker
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Copyright (C) CVAT.ai Corporation
2+
#
3+
# SPDX-License-Identifier: MIT
4+
5+
cvat_sdk>=2.42
6+
huggingface_hub
7+
opencv-python-headless
8+
sam-2 @ git+https://github.com/facebookresearch/sam2.git@2b90b9f5ceec907a1c18123530e92e794ad901a4

cvat-canvas/src/typescript/canvas.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ interface Canvas {
2323
html(): HTMLDivElement;
2424
setup(frameData: any, objectStates: any[], zLayer?: number): void;
2525
setupIssueRegions(issueRegions: Record<number, { hidden: boolean; points: number[] }>): void;
26+
translateFromSVG(points: number[]): number[];
2627
setupConflictRegions(clientID: number): number[];
2728
activate(clientID: number | null, attributeID?: number): void;
2829
highlight(clientIDs: number[] | null, severity: HighlightSeverity | null): void;
@@ -79,6 +80,10 @@ class CanvasImpl implements Canvas {
7980
this.model.setupIssueRegions(issueRegions);
8081
}
8182

83+
public translateFromSVG(points: number[]): number[] {
84+
return this.view.translateFromSVG(points);
85+
}
86+
8287
public setupConflictRegions(clientID: number): number[] {
8388
return this.view.setupConflictRegions(clientID);
8489
}

cvat-canvas/src/typescript/canvasView.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ import {
4545
export interface CanvasView {
4646
html(): HTMLDivElement;
4747
setupConflictRegions(clientID: number): number[];
48+
translateFromSVG(points: number[]): number[];
4849
}
4950

5051
export class CanvasViewImpl implements CanvasView, Listener {
@@ -2241,6 +2242,10 @@ export class CanvasViewImpl implements CanvasView, Listener {
22412242
return [cx, cy];
22422243
}
22432244

2245+
public translateFromSVG(point: number[]): number[] {
2246+
return translateFromSVG(this.content, point);
2247+
}
2248+
22442249
private redrawBitmap(): void {
22452250
this.bitmapUpdateReqId++;
22462251
const { bitmapUpdateReqId } = this;

cvat-cli/requirements/base.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
cvat-sdk==2.42.0
1+
cvat-sdk==2.43.0
22

33
attrs>=24.2.0
44
Pillow>=10.3.0

cvat-cli/src/cvat_cli/version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
VERSION = "2.42.0"
1+
VERSION = "2.43.0"

0 commit comments

Comments
 (0)