Skip to content

Commit 5082c47

Browse files
mickqianshuaills
authored andcommitted
Revert "feat: replace Decord with video_reader-rs" (sgl-project#8077)
1 parent 0132449 commit 5082c47

File tree

6 files changed

+21
-16
lines changed

6 files changed

+21
-16
lines changed

python/pyproject.toml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ runtime_common = [
2121
"build",
2222
"compressed-tensors",
2323
"datasets",
24-
"video-reader-rs",
2524
"fastapi",
2625
"hf_transfer",
2726
"huggingface_hub",

python/sglang/check_env.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ def is_cuda_v2():
4747
"tiktoken",
4848
"anthropic",
4949
"litellm",
50-
"video-reader-rs",
50+
"decord",
5151
]
5252

5353

python/sglang/srt/multimodal/processors/base_processor.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ def get_estimated_frames_list(self, image_data):
206206
estimate the total frame count from all visual input
207207
"""
208208
# Lazy import because decord is not available on some arm platforms.
209-
from video_reader import PyVideoReader, cpu
209+
from decord import VideoReader, cpu
210210

211211
# Before processing inputs
212212
if not image_data or len(image_data) == 0:
@@ -216,7 +216,7 @@ def get_estimated_frames_list(self, image_data):
216216
if isinstance(image, str) and image.startswith("video:"):
217217
path = image[len("video:") :]
218218
# Estimate frames for the video
219-
vr = PyVideoReader(path, threads=0)
219+
vr = VideoReader(path, ctx=cpu(0))
220220
num_frames = len(vr)
221221
else:
222222
# For images, each contributes one frame

python/sglang/srt/multimodal/processors/internvl.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,15 +150,15 @@ def get_index(bound, fps, max_frame, first_idx=0, num_segments=32):
150150
def load_video(video_path, bound=None, input_size=448, max_num=1, num_segments=32):
151151
vr = VideoReader(video_path, ctx=cpu(0), num_threads=1)
152152
max_frame = len(vr) - 1
153-
fps = float(vr.get_fps())
153+
fps = float(vr.get_avg_fps())
154154

155155
pixel_values_list, num_patches_list = [], []
156156
transform = InternVLImageProcessor.build_transform(input_size=input_size)
157157
frame_indices = InternVLImageProcessor.get_index(
158158
bound, fps, max_frame, first_idx=0, num_segments=num_segments
159159
)
160160
for frame_index in frame_indices:
161-
img = Image.fromarray(vr[frame_index]).convert("RGB")
161+
img = Image.fromarray(vr[frame_index].asnumpy()).convert("RGB")
162162
img = InternVLImageProcessor.dynamic_preprocess(
163163
img, image_size=input_size, use_thumbnail=True, max_num=max_num
164164
)

python/sglang/srt/multimodal/processors/qwen_vl.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,10 +156,10 @@ async def preprocess_video(
156156
# vr: VideoReader, image_factor: int = IMAGE_FACTOR
157157
) -> torch.Tensor:
158158
ele = {}
159-
total_frames, video_fps = len(vr), vr.get_fps()
159+
total_frames, video_fps = len(vr), vr.get_avg_fps()
160160
nframes = smart_nframes({}, total_frames=total_frames, video_fps=video_fps)
161161
idx = torch.linspace(0, total_frames - 1, nframes).round().long().tolist()
162-
video = vr.get_batch(idx)
162+
video = vr.get_batch(idx).asnumpy()
163163
video = torch.tensor(video).permute(0, 3, 1, 2) # Convert to TCHW format
164164
nframes, _, height, width = video.shape
165165
min_pixels = ele.get("min_pixels", VIDEO_MIN_PIXELS)

python/sglang/srt/utils.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,6 @@
8484
from torch.profiler import ProfilerActivity, profile, record_function
8585
from torch.utils._contextlib import _DecoratorContextManager
8686
from triton.runtime.cache import FileCacheManager
87-
from video_reader import PyVideoReader
8887

8988
logger = logging.getLogger(__name__)
9089

@@ -758,17 +757,24 @@ def load_image(
758757

759758
def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
760759
# We import decord here to avoid a strange Segmentation fault (core dumped) issue.
761-
from video_reader import PyVideoReader
760+
from decord import VideoReader, cpu, gpu
761+
762+
try:
763+
from decord.bridge import decord_bridge
764+
765+
ctx = gpu(0)
766+
_ = decord_bridge.get_ctx_device(ctx)
767+
except Exception:
768+
ctx = cpu(0)
762769

763-
device = "cuda" if use_gpu and torch.cuda.is_available() else None
764770
tmp_file = None
765771
vr = None
766772
try:
767773
if isinstance(video_file, bytes):
768774
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
769775
tmp_file.write(video_file)
770776
tmp_file.close()
771-
vr = PyVideoReader(tmp_file.name, device=device, threads=0)
777+
vr = VideoReader(tmp_file.name, ctx=ctx)
772778
elif isinstance(video_file, str):
773779
if video_file.startswith(("http://", "https://")):
774780
timeout = int(os.getenv("REQUEST_TIMEOUT", "10"))
@@ -778,22 +784,22 @@ def load_video(video_file: Union[str, bytes], use_gpu: bool = True):
778784
for chunk in response.iter_content(chunk_size=8192):
779785
tmp_file.write(chunk)
780786
tmp_file.close()
781-
vr = PyVideoReader(tmp_file.name, device=device, threads=0)
787+
vr = VideoReader(tmp_file.name, ctx=ctx)
782788
elif video_file.startswith("data:"):
783789
_, encoded = video_file.split(",", 1)
784790
video_bytes = base64.b64decode(encoded)
785791
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
786792
tmp_file.write(video_bytes)
787793
tmp_file.close()
788-
vr = PyVideoReader(tmp_file.name, device=device, threads=0)
794+
vr = VideoReader(tmp_file.name, ctx=ctx)
789795
elif os.path.isfile(video_file):
790-
vr = PyVideoReader(video_file, device=device, threads=0)
796+
vr = VideoReader(video_file, ctx=ctx)
791797
else:
792798
video_bytes = base64.b64decode(video_file)
793799
tmp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp4")
794800
tmp_file.write(video_bytes)
795801
tmp_file.close()
796-
vr = PyVideoReader(tmp_file.name, device=device, threads=0)
802+
vr = VideoReader(tmp_file.name, ctx=ctx)
797803
else:
798804
raise ValueError(f"Unsupported video input type: {type(video_file)}")
799805

0 commit comments

Comments
 (0)