Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Use dyn dispatch to reduce monomorphization bloat (~177KB savings)
Add #[inline(never)] wrapper pattern to prevent monomorphization of
heavy process functions across multiple input types. The public API
remains generic for ergonomics, but delegates to non-generic internal
functions that use dynamic dispatch.

Changes:
- decoder.rs: Add process_dyn/skip_frame_dyn with #[inline(never)]
- process.rs: Use &mut dyn JxlBitstreamInput
- codestream_parser/mod.rs: Use &mut dyn JxlBitstreamInput
- box_parser.rs: Use &mut dyn JxlBitstreamInput

Binary size impact (with 3 input types):
- Before: 4,065 KB (.text), 3x86KB process copies
- After:  3,883 KB (.text), 1x86KB process copy
- Savings: ~177 KB
  • Loading branch information
hjanuschka committed Dec 2, 2025
commit b9e84539f1f9b768ee4213fcb7ece50eff9638b4
45 changes: 43 additions & 2 deletions jxl/src/api/decoder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -83,9 +83,20 @@ impl JxlDecoder<Initialized> {
Self::wrap_inner(JxlDecoderInner::new(options))
}

/// Generic wrapper that delegates to non-generic implementation
#[inline(always)]
pub fn process(
mut self,
input: &mut impl JxlBitstreamInput,
) -> Result<ProcessingResult<JxlDecoder<WithImageInfo>, Self>> {
self.process_dyn(input)
}

/// Non-generic implementation to avoid monomorphization bloat
#[inline(never)]
fn process_dyn(
mut self,
input: &mut dyn JxlBitstreamInput,
) -> Result<ProcessingResult<JxlDecoder<WithImageInfo>, Self>> {
let inner_result = self.inner.process(input, None)?;
Ok(self.map_inner_processing_result(inner_result))
Expand Down Expand Up @@ -124,9 +135,20 @@ impl JxlDecoder<WithImageInfo> {
self.inner.set_pixel_format(pixel_format);
}

/// Generic wrapper that delegates to non-generic implementation
#[inline(always)]
pub fn process(
mut self,
input: &mut impl JxlBitstreamInput,
) -> Result<ProcessingResult<JxlDecoder<WithFrameInfo>, Self>> {
self.process_dyn(input)
}

/// Non-generic implementation to avoid monomorphization bloat
#[inline(never)]
fn process_dyn(
mut self,
input: &mut dyn JxlBitstreamInput,
) -> Result<ProcessingResult<JxlDecoder<WithFrameInfo>, Self>> {
let inner_result = self.inner.process(input, None)?;
Ok(self.map_inner_processing_result(inner_result))
Expand All @@ -144,9 +166,18 @@ impl JxlDecoder<WithImageInfo> {

impl JxlDecoder<WithFrameInfo> {
/// Skip the current frame.
#[inline(always)]
pub fn skip_frame(
mut self,
input: &mut impl JxlBitstreamInput,
) -> Result<ProcessingResult<JxlDecoder<WithImageInfo>, Self>> {
self.skip_frame_dyn(input)
}

#[inline(never)]
fn skip_frame_dyn(
mut self,
input: &mut dyn JxlBitstreamInput,
) -> Result<ProcessingResult<JxlDecoder<WithImageInfo>, Self>> {
let inner_result = self.inner.process(input, None)?;
Ok(self.map_inner_processing_result(inner_result))
Expand All @@ -168,9 +199,19 @@ impl JxlDecoder<WithFrameInfo> {

/// Guarantees to populate exactly the appropriate part of the buffers.
/// Wants one buffer for each non-ignored pixel type, i.e. color channels and each extra channel.
pub fn process<In: JxlBitstreamInput>(
#[inline(always)]
pub fn process(
mut self,
input: &mut impl JxlBitstreamInput,
buffers: &mut [JxlOutputBuffer<'_>],
) -> Result<ProcessingResult<JxlDecoder<WithImageInfo>, Self>> {
self.process_dyn(input, buffers)
}

#[inline(never)]
fn process_dyn(
mut self,
input: &mut In,
input: &mut dyn JxlBitstreamInput,
buffers: &mut [JxlOutputBuffer<'_>],
) -> Result<ProcessingResult<JxlDecoder<WithImageInfo>, Self>> {
let inner_result = self.inner.process(input, Some(buffers))?;
Expand Down
7 changes: 2 additions & 5 deletions jxl/src/api/inner/box_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,8 @@ impl BoxParser {
// Returns the number of codestream bytes that will be available to be read after this call,
// including any bytes in self.box_buffer.
// Might return `u64::MAX`, indicating that the rest of the file is codestream.
pub(super) fn get_more_codestream(
&mut self,
input: &mut impl JxlBitstreamInput,
) -> Result<u64> {
// TODO(veluca): consider moving most of this function into a function that is not generic.
#[inline(never)]
pub(super) fn get_more_codestream(&mut self, input: &mut dyn JxlBitstreamInput) -> Result<u64> {
loop {
match self.state.clone() {
ParseState::SignatureNeeded => {
Expand Down
5 changes: 3 additions & 2 deletions jxl/src/api/inner/codestream_parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -129,10 +129,11 @@ impl CodestreamParser {
.set_use_simple_pipeline(u);
}

pub(super) fn process<In: JxlBitstreamInput>(
#[inline(never)]
pub(super) fn process(
&mut self,
box_parser: &mut BoxParser,
input: &mut In,
input: &mut dyn JxlBitstreamInput,
decode_options: &JxlDecoderOptions,
mut output_buffers: Option<&mut [JxlOutputBuffer]>,
) -> Result<()> {
Expand Down
5 changes: 3 additions & 2 deletions jxl/src/api/inner/process.rs
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,10 @@ impl JxlDecoderInner {
/// file/frame header, or finished decoding a frame).
/// If called when decoding a frame with `None` for buffers, the frame will still be read,
/// but pixel data will not be produced.
pub fn process<In: JxlBitstreamInput>(
#[inline(never)]
pub fn process(
&mut self,
input: &mut In,
input: &mut dyn JxlBitstreamInput,
buffers: Option<&mut [JxlOutputBuffer]>,
) -> Result<ProcessingResult<(), ()>> {
ProcessingResult::new(self.codestream_parser.process(
Expand Down
Loading