@inproceedings{0b9a80203a764eacbfd8a5bbc1bb7dae,
title = "Learned Hybrid Video Coding for Human Perception and Multiple Machine Vision Tasks",
abstract = "In this work, we present a learned multi-task video codec that is optimized for human and machine vision. The codec consists of an encoder that maps images from the pixel domain to a latent representation and multiple decoders that map the latent to either an image for human consumption or multiple task-specific features for different machine vision tasks. This allows a single bitstream to be used for multiple tasks while also reducing the decoder complexity for machine vision tasks. Unlike most learned codecs, our method performs inter-coding at the latent level instead of the pixel domain. Experiments show that the proposed method achieves a compression performance for machine vision tasks comparable to other multi-task codecs designed for machine vision only, while also providing video reconstruction.",
keywords = "feature compression, video coding, Video coding for machines",
author = "Martin Benjak and Saifullah Khan and Chen, {Yi Hsin} and Peng, {Wen Hsiao} and J{\"o}rn Ostermann",
note = "Publisher Copyright: {\textcopyright}2025 IEEE.; 32nd IEEE International Conference on Image Processing, ICIP 2025, ICIP 2025 ; Conference date: 14-09-2025 Through 17-09-2025",
year = "2025",
month = sep,
day = "14",
doi = "10.1109/ICIP55913.2025.11084300",
language = "English",
isbn = "979-8-3315-2380-0",
series = "Proceedings - International Conference on Image Processing, ICIP",
publisher = "IEEE Computer Society",
pages = "1996--2001",
booktitle = "2025 IEEE International Conference on Image Processing, ICIP 2025 - Proceedings",
address = "United States",
}