diff --git a/.github/workflows/rav1e.yml b/.github/workflows/rav1e.yml index f59560b84f..3bd52c3560 100644 --- a/.github/workflows/rav1e.yml +++ b/.github/workflows/rav1e.yml @@ -12,7 +12,6 @@ on: jobs: rustfmt-clippy: - runs-on: ubuntu-22.04 steps: @@ -193,7 +192,7 @@ jobs: - name: Check extra features if: matrix.toolchain == 'stable' && matrix.conf == 'check-extra-feats' run: | - cargo check --features=check_asm,capi,dump_lookahead_data,serialize,bench --all-targets + cargo check --features=check_asm,capi,dump_lookahead_data,serialize,bench,devel --all-targets - name: Check extra features if: matrix.toolchain == 'stable' && matrix.conf == 'check-unstable-feats' run: | diff --git a/Cargo.toml b/Cargo.toml index 320e7aafa4..29791822c6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -24,6 +24,9 @@ default-run = "rav1e" [features] unstable = [] +# Exposes extra flags for tuning compiler internals. +# Intended to be used by developers to find ideal internal settings. +devel = [] channel-api = ["crossbeam"] decode_test = ["aom-sys"] decode_test_dav1d = ["dav1d-sys"] diff --git a/src/api/config/encoder.rs b/src/api/config/encoder.rs index 7f84d5a081..e5085fcadb 100644 --- a/src/api/config/encoder.rs +++ b/src/api/config/encoder.rs @@ -108,6 +108,38 @@ pub struct EncoderConfig { /// Settings which affect the encoding speed vs. quality trade-off. pub speed_settings: SpeedSettings, + + /// Advanced settings which are intended for use by developers. + /// Non-developers should use the default values. + pub advanced_flags: AdvancedTuning, +} + +/// Advanced settings that are intended for use by developers +/// for tuning compiler internals. +#[derive(Debug, Clone, Copy, Serialize, Deserialize)] +pub struct AdvancedTuning { + /// Controls the strength of the deblock filter, as a multiplier to the default. + pub deblock_strength: f32, + /// Controls the sharpness of the deblock filter. Accepts a value from 0-7. + pub deblock_sharpness: u8, + /// Controls the ratio between intra frame and inter frame quantizers, as a multiplier. + /// Default is 1.0. Higher values create a higher quantizer difference, while lower values + /// create a lower quantizer difference. A value of 0.0 would mean that I and P quantizers + /// are the same. + pub ip_qidx_ratio: f32, + /// Controls the strength of temporal RDO, as a multiplier to the default. + pub temporal_rdo_strength: f32, +} + +impl Default for AdvancedTuning { + fn default() -> Self { + Self { + deblock_strength: 1.0, + deblock_sharpness: 0, + ip_qidx_ratio: 1.0, + temporal_rdo_strength: 1.0, + } + } } /// Default preset for `EncoderConfig`: it is a balance between quality and @@ -163,6 +195,7 @@ impl EncoderConfig { tile_rows: 0, tiles: 0, speed_settings: SpeedSettings::from_preset(speed), + advanced_flags: Default::default(), } } diff --git a/src/api/test.rs b/src/api/test.rs index 072562631f..b82f3311fb 100644 --- a/src/api/test.rs +++ b/src/api/test.rs @@ -2164,6 +2164,7 @@ fn log_q_exp_overflow() { }, ..Default::default() }, + advanced_flags: Default::default(), }; let config = Config::new().with_encoder_config(enc).with_threads(1); @@ -2240,6 +2241,7 @@ fn guess_frame_subtypes_assert() { }, ..Default::default() }, + advanced_flags: Default::default(), }; let config = Config::new().with_encoder_config(enc).with_threads(1); diff --git a/src/bin/common.rs b/src/bin/common.rs index 3e0d5aa8c4..f43f760d97 100644 --- a/src/bin/common.rs +++ b/src/bin/common.rs @@ -242,10 +242,34 @@ pub struct CliOptions { #[clap(long, short, value_parser, help_heading = "DEBUGGING")] pub reconstruction: Option, + /// Controls the strength of the deblock filter, as a multiplier to the default. + #[clap(long, value_parser = positive_float, default_value_t=1.0f32, help_heading = "ADVANCED")] + pub deblock_strength: f32, + /// Controls the sharpness of the deblock filter. Accepts a value from 0-7. + #[clap(long, value_parser = clap::value_parser!(u8).range(0..=7), default_value_t=0, help_heading = "ADVANCED")] + pub deblock_sharpness: u8, + /// Controls the ratio between intra frame and inter frame quantizers, as a multiplier. + /// Higher values create a higher quantizer difference, while lower values + /// create a lower quantizer difference. A value of 0.0 would mean that I and P quantizers + /// are the same. + #[clap(long, value_parser = positive_float, default_value_t=1.0f32, help_heading = "ADVANCED")] + pub ip_qidx_ratio: f32, + /// Controls the strength of temporal RDO, as a multiplier to the default. + #[clap(long, value_parser = positive_float, default_value_t=1.0f32, help_heading = "ADVANCED")] + pub temporal_rdo_strength: f32, + #[clap(subcommand)] pub command: Option, } +fn positive_float(input: &str) -> Result { + let value = input.parse::().map_err(|e| e.to_string())?; + if value < 0.0 { + return Err("Value must not be negative".to_string()); + } + Ok(value) +} + fn get_version() -> &'static str { static VERSION_STR: Lazy = Lazy::new(|| { format!( @@ -299,7 +323,7 @@ pub enum Commands { #[clap(long, short, value_parser)] save_config: Option, /// Load the encoder configuration from a toml file - #[clap(long, short, value_parser, conflicts_with = "save-config")] + #[clap(long, short, value_parser, conflicts_with = "save_config")] load_config: Option, }, } @@ -484,6 +508,16 @@ pub fn parse_cli() -> Result { }) } +#[cfg(feature = "devel")] +const fn parse_advanced_flags(cli: &CliOptions) -> AdvancedTuning { + AdvancedTuning { + deblock_strength: cli.deblock_strength, + deblock_sharpness: cli.deblock_sharpness, + ip_qidx_ratio: cli.ip_qidx_ratio, + temporal_rdo_strength: cli.temporal_rdo_strength, + } +} + fn parse_config(matches: &CliOptions) -> Result { let maybe_quantizer = matches.quantizer; let maybe_bitrate = matches.bitrate; @@ -674,5 +708,10 @@ fn parse_config(matches: &CliOptions) -> Result { cfg.speed_settings.scene_detection_mode = SceneDetectionSpeed::None; } + #[cfg(feature = "devel")] + { + cfg.advanced_flags = parse_advanced_flags(matches); + } + Ok(cfg) } diff --git a/src/encoder.rs b/src/encoder.rs index dbf8fd97a4..0af13761ec 100644 --- a/src/encoder.rs +++ b/src/encoder.rs @@ -501,7 +501,7 @@ impl FrameState { cdfs: CDFContext::new(0), context_update_tile_id: 0, max_tile_size_bytes: 0, - deblock: Default::default(), + deblock: DeblockState::new(&fi.config), segmentation: Default::default(), restoration: rs, frame_me_stats: FrameMEStats::new_arc_array(fi.w_in_b, fi.h_in_b), @@ -543,6 +543,19 @@ pub struct DeblockState { pub block_delta_multi: bool, } +impl DeblockState { + pub fn new(config: &EncoderConfig) -> Self { + let mut state = DeblockState { ..Default::default() }; + for level in &mut state.levels { + *level = ((*level as f32) * config.advanced_flags.deblock_strength) + .min(MAX_LOOP_FILTER as f32) + .round() as u8; + } + state.sharpness = config.advanced_flags.deblock_sharpness; + state + } +} + impl Default for DeblockState { fn default() -> Self { DeblockState { diff --git a/src/fuzzing.rs b/src/fuzzing.rs index aab9abe059..2d767da8a2 100644 --- a/src/fuzzing.rs +++ b/src/fuzzing.rs @@ -257,6 +257,7 @@ impl Arbitrary for ArbitraryEncoder { switch_frame_interval: u.int_in_range(0..=3)?, tune: *u.choose(&[Tune::Psnr, Tune::Psychovisual])?, film_grain_params: None, + advanced_flags: Default::default(), }; let frame_count = diff --git a/src/rate.rs b/src/rate.rs index e7633777a1..d0ee940b28 100644 --- a/src/rate.rs +++ b/src/rate.rs @@ -703,11 +703,12 @@ impl RCState { pub(crate) fn select_first_pass_qi( &self, bit_depth: usize, fti: usize, chroma_sampling: ChromaSampling, + ft_ratio: f64, ) -> QuantizerParameters { // Adjust the quantizer for the frame type, result is Q57: let log_q = ((self.pass1_log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[fti] as i64) - + DQP_Q57[fti]; + + (DQP_Q57[fti] as f64 * ft_ratio) as i64; QuantizerParameters::new_from_log_q( self.pass1_log_base_q, log_q, @@ -723,14 +724,20 @@ impl RCState { &self, ctx: &ContextInner, output_frameno: u64, fti: usize, maybe_prev_log_base_q: Option, log_isqrt_mean_scale: i64, ) -> QuantizerParameters { + let ft_ratio = ctx.config.advanced_flags.ip_qidx_ratio as f64; + // Is rate control active? if self.target_bitrate <= 0 { // Rate control is not active. // Derive quantizer directly from frame type. let bit_depth = ctx.config.bit_depth; let chroma_sampling = ctx.config.chroma_sampling; - let (log_base_q, log_q) = - Self::calc_flat_quantizer(ctx.config.quantizer as u8, bit_depth, fti); + let (log_base_q, log_q) = Self::calc_flat_quantizer( + ctx.config.quantizer as u8, + bit_depth, + fti, + ft_ratio, + ); QuantizerParameters::new_from_log_q( log_base_q, log_q, @@ -752,6 +759,7 @@ impl RCState { ctx.config.bit_depth, fti, ctx.config.chroma_sampling, + ft_ratio, ); } // Second pass of 2-pass mode: we know exactly how much of each frame @@ -925,7 +933,7 @@ impl RCState { // Modulate base quantizer by frame type. let log_q = ((log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[ftj] as i64) - + DQP_Q57[ftj]; + + (DQP_Q57[ftj] as f64 * ft_ratio) as i64; // All the fields here are Q57 except for the exponent, which is // Q6. bits += (nframes[ftj] as i64) @@ -959,7 +967,7 @@ impl RCState { // Modulate base quantizer by frame type. let mut log_q = ((log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[fti] as i64) - + DQP_Q57[fti]; + + (DQP_Q57[fti] as f64 * ft_ratio) as i64; // The above allocation looks only at the total rate we'll accumulate // in the next reservoir_frame_delay frames. // However, we could overflow the bit reservoir on the very next @@ -1019,14 +1027,22 @@ impl RCState { } if let Some(qi_max) = self.maybe_ac_qi_max { - let (max_log_base_q, max_log_q) = - Self::calc_flat_quantizer(qi_max, ctx.config.bit_depth, fti); + let (max_log_base_q, max_log_q) = Self::calc_flat_quantizer( + qi_max, + ctx.config.bit_depth, + fti, + ft_ratio, + ); log_base_q = cmp::min(log_base_q, max_log_base_q); log_q = cmp::min(log_q, max_log_q); } if self.ac_qi_min > 0 { - let (min_log_base_q, min_log_q) = - Self::calc_flat_quantizer(self.ac_qi_min, ctx.config.bit_depth, fti); + let (min_log_base_q, min_log_q) = Self::calc_flat_quantizer( + self.ac_qi_min, + ctx.config.bit_depth, + fti, + ft_ratio, + ); log_base_q = cmp::max(log_base_q, min_log_base_q); log_q = cmp::max(log_q, min_log_q); } @@ -1044,7 +1060,7 @@ impl RCState { // Computes a quantizer directly from the frame type and base quantizer index, // without consideration for rate control. fn calc_flat_quantizer( - base_qi: u8, bit_depth: usize, fti: usize, + base_qi: u8, bit_depth: usize, fti: usize, ft_ratio: f64, ) -> (i64, i64) { // TODO: Rename "quantizer" something that indicates it is a quantizer // index, and move it somewhere more sensible (or choose a better way to @@ -1063,7 +1079,7 @@ impl RCState { let log_base_q = (log_ac_q + log_dc_q + 1) >> 1; // Adjust the quantizer for the frame type, result is Q57: let log_q = ((log_base_q + (1i64 << 11)) >> 12) * (MQP_Q12[fti] as i64) - + DQP_Q57[fti]; + + (DQP_Q57[fti] as f64 * ft_ratio) as i64; (log_base_q, log_q) } diff --git a/src/rdo.rs b/src/rdo.rs index 553d6f9d75..aa0a33a4b6 100644 --- a/src/rdo.rs +++ b/src/rdo.rs @@ -455,6 +455,7 @@ pub fn distortion_scale( let coded_data = fi.coded_frame_data.as_ref().unwrap(); coded_data.distortion_scales[y * coded_data.w_in_imp_b + x] + .strength_adjusted(fi.config.advanced_flags.temporal_rdo_strength as f64) } /// # Panics @@ -504,6 +505,7 @@ pub fn spatiotemporal_scale( .sum::(); } DistortionScale(((sum + (den >> 1)) / den) as u32) + .strength_adjusted(fi.config.advanced_flags.temporal_rdo_strength as f64) } pub fn distortion_scale_for( @@ -617,6 +619,22 @@ impl DistortionScale { pub const fn mul_u64(self, dist: u64) -> u64 { (self.0 as u64 * dist + (1 << Self::SHIFT >> 1)) >> Self::SHIFT } + + #[inline] + #[cfg(feature = "devel")] + pub fn strength_adjusted(self, strength: f64) -> Self { + let diff = 1.0 - f64::from(self); + let add = diff * strength; + DistortionScale::from((1.0 + add).max(0.0)) + } + + #[inline(always)] + #[cfg(not(feature = "devel"))] + pub fn strength_adjusted(self, _strength: f64) -> Self { + // If we aren't using a devel build, just return self + // so we do not add any performance cost. + self + } } impl std::ops::Mul for DistortionScale {