From 65fd63c22a5527ca5e42d721c3a76b5b0f32207e Mon Sep 17 00:00:00 2001 From: Luni-4 Date: Fri, 22 Nov 2019 09:34:33 +0100 Subject: [PATCH] Parallelize frame processing --- Cargo.lock | 1 + av_metrics/Cargo.toml | 3 +- av_metrics/src/lib.rs | 2 + av_metrics/src/video/ciede/mod.rs | 24 +++++-- av_metrics/src/video/decode/mod.rs | 2 +- av_metrics/src/video/decode/y4m.rs | 2 +- av_metrics/src/video/mod.rs | 108 +++++++++++++++++++++-------- av_metrics/src/video/psnr.rs | 16 +++-- av_metrics/src/video/psnr_hvs.rs | 38 ++++++---- av_metrics/src/video/ssim.rs | 74 +++++++++++++------- 10 files changed, 192 insertions(+), 78 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7fa62ea..968c204 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -39,6 +39,7 @@ dependencies = [ "itertools 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "lab 0.7.2 (registry+https://github.com/rust-lang/crates.io-index)", "num-traits 0.2.9 (registry+https://github.com/rust-lang/crates.io-index)", + "rayon 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)", "y4m 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)", ] diff --git a/av_metrics/Cargo.toml b/av_metrics/Cargo.toml index 4ab11ef..5c4a015 100644 --- a/av_metrics/Cargo.toml +++ b/av_metrics/Cargo.toml @@ -13,6 +13,7 @@ itertools = "0.8.1" lab = "0.7.2" num-traits = "0.2" serde = { version = "1", features = ["derive"], optional = true } +rayon = { version = "1.2", optional = true } y4m = { version = "0.4", optional = true } [dev-dependencies] @@ -20,7 +21,7 @@ criterion = "0.3" [features] default = ["y4m-decode"] -decode = [] +decode = ["rayon"] y4m-decode = ["y4m", "decode"] bench = [] diff --git a/av_metrics/src/lib.rs b/av_metrics/src/lib.rs index 67e9ca0..325792a 100644 --- a/av_metrics/src/lib.rs +++ b/av_metrics/src/lib.rs @@ -12,6 +12,8 @@ extern crate err_derive; #[macro_use] extern crate itertools; +extern crate rayon; + pub mod video; /// Possible errors that may occur during processing of a metric. diff --git a/av_metrics/src/video/ciede/mod.rs b/av_metrics/src/video/ciede/mod.rs index b36db77..420d7bd 100644 --- a/av_metrics/src/video/ciede/mod.rs +++ b/av_metrics/src/video/ciede/mod.rs @@ -8,7 +8,9 @@ #[cfg(feature = "decode")] use crate::video::decode::Decoder; use crate::video::pixel::{CastFromPrimitive, Pixel}; +use crate::video::ParallelMethod; use crate::video::{FrameInfo, VideoMetric}; +use crate::MetricsError; use std::f64; mod rgbtolab; @@ -54,7 +56,10 @@ pub fn calculate_frame_ciede( frame1: &FrameInfo, frame2: &FrameInfo, ) -> Result> { - Ciede2000::default().process_frame(frame1, frame2) + match Ciede2000::default().process_frame(frame1, frame2) { + Ok((val, _)) => Ok(val), + Err(val) => Err(val.into()), + } } /// Calculate the CIEDE2000 metric between two video frames. Higher is better. @@ -67,7 +72,10 @@ pub fn calculate_frame_ciede_nosimd( frame1: &FrameInfo, frame2: &FrameInfo, ) -> Result> { - (Ciede2000 { use_simd: false }).process_frame(frame1, frame2) + match (Ciede2000 { use_simd: false }).process_frame(frame1, frame2) { + Ok((val, _)) => Ok(val), + Err(val) => Err(val.into()), + } } struct Ciede2000 { @@ -85,10 +93,10 @@ impl VideoMetric for Ciede2000 { type VideoResult = f64; fn process_frame( - &mut self, + &self, frame1: &FrameInfo, frame2: &FrameInfo, - ) -> Result> { + ) -> Result<(Self::FrameResult, Option), MetricsError> { frame1.can_compare(&frame2)?; let dec = frame1.chroma_sampling.get_decimation().unwrap_or((1, 1)); @@ -123,7 +131,7 @@ impl VideoMetric for Ciede2000 { * (delta_e_vec.iter().map(|x| *x as f64).sum::() / ((y_width * y_height) as f64)) .log10(); - Ok(score.min(100.)) + Ok((score.min(100.), None)) } #[cfg(feature = "decode")] @@ -133,6 +141,12 @@ impl VideoMetric for Ciede2000 { ) -> Result> { Ok(metrics.iter().copied().sum::() / metrics.len() as f64) } + + fn which_method(&self) -> ParallelMethod { + ParallelMethod::Ciede + } + + fn set_cweight(&mut self, _cweight: f64) {} } // Arguments for delta e diff --git a/av_metrics/src/video/decode/mod.rs b/av_metrics/src/video/decode/mod.rs index 2669212..0656470 100644 --- a/av_metrics/src/video/decode/mod.rs +++ b/av_metrics/src/video/decode/mod.rs @@ -12,7 +12,7 @@ pub use self::y4m::*; /// Currently, y4m decoding support using the `y4m` crate is built-in /// to this crate. This trait is extensible so users may implement /// their own decoders. -pub trait Decoder { +pub trait Decoder: Send { /// Read the next frame from the input video. /// /// Expected to return `Err` if the end of the video is reached. diff --git a/av_metrics/src/video/decode/y4m.rs b/av_metrics/src/video/decode/y4m.rs index 39b8381..6d85a50 100644 --- a/av_metrics/src/video/decode/y4m.rs +++ b/av_metrics/src/video/decode/y4m.rs @@ -38,7 +38,7 @@ fn copy_from_raw_u8(source: &[u8]) -> Vec { } } -impl Decoder for y4m::Decoder<'_, R> { +impl Decoder for y4m::Decoder<'_, R> { fn read_video_frame(&mut self) -> Result, ()> { let bit_depth = self.get_bit_depth(); let (chroma_sampling, chroma_sample_pos) = get_chroma_sampling(self); diff --git a/av_metrics/src/video/mod.rs b/av_metrics/src/video/mod.rs index 375f624..b61adf3 100644 --- a/av_metrics/src/video/mod.rs +++ b/av_metrics/src/video/mod.rs @@ -9,7 +9,9 @@ pub mod psnr_hvs; pub mod ssim; use crate::MetricsError; +use rayon::prelude::*; use std::error::Error; +use std::iter; #[cfg(feature = "decode")] pub use decode::*; @@ -185,6 +187,44 @@ pub struct PlanarMetrics { pub avg: f64, } +/// Establishes a different parallel method according to the metric +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum ParallelMethod { + /// Method used for Psnr + Psnr, + /// Method used for Ciede + Ciede, + /// Method used for all the other metrics + Others, +} + +macro_rules! process_video { + ($self:ident, $decoder1:ident, $decoder2:ident, $frame_limit:ident, $type:ident, $ret:ident) => { + $ret = iter::from_fn(|| $decoder1.read_video_frame::<$type>().ok()) + .zip(iter::from_fn(|| $decoder2.read_video_frame::<$type>().ok())) + .take($frame_limit.unwrap_or(std::usize::MAX)) + .par_bridge() + .try_fold( + || vec![], + |mut acc, (frame1, frame2)| { + $self + .process_frame(&frame1, &frame2) + .map(|(metrics, cweight)| { + acc.push((metrics, cweight)); + acc + }) + }, + ) + .try_reduce( + || vec![], + |mut a, b| { + a.extend_from_slice(&b); + Ok(a) + }, + )?; + }; +} + trait VideoMetric { type FrameResult; type VideoResult; @@ -200,55 +240,63 @@ trait VideoMetric { decoder1: &mut D, decoder2: &mut D, frame_limit: Option, - ) -> Result> { + ) -> Result> + where + Self: std::marker::Send, + Self: std::marker::Sync, + Self::FrameResult: std::marker::Send, + Self::FrameResult: std::clone::Clone, + { if decoder1.get_bit_depth() != decoder2.get_bit_depth() { return Err(Box::new(MetricsError::InputMismatch { reason: "Bit depths do not match", })); } - let mut metrics = Vec::with_capacity(frame_limit.unwrap_or(0)); - let mut frame_no = 0; - while frame_limit.map(|limit| limit > frame_no).unwrap_or(true) { - if decoder1.get_bit_depth() > 8 { - let frame1 = decoder1.read_video_frame::(); - let frame2 = decoder2.read_video_frame::(); - if let Ok(frame1) = frame1 { - if let Ok(frame2) = frame2 { - metrics.push(self.process_frame(&frame1, &frame2)?); - frame_no += 1; - continue; - } - } - } else { - let frame1 = decoder1.read_video_frame::(); - let frame2 = decoder2.read_video_frame::(); - if let Ok(frame1) = frame1 { - if let Ok(frame2) = frame2 { - metrics.push(self.process_frame(&frame1, &frame2)?); - frame_no += 1; - continue; - } - } - } - // At end of video - break; + let metrics_values: Vec<(Self::FrameResult, Option)>; + if decoder1.get_bit_depth() > 8 { + process_video!(self, decoder1, decoder2, frame_limit, u16, metrics_values); + } else { + process_video!(self, decoder1, decoder2, frame_limit, u8, metrics_values); } - if frame_no == 0 { + + if metrics_values.is_empty() { return Err(MetricsError::UnsupportedInput { reason: "No readable frames found in one or more input files", } .into()); } + let metrics = metrics_values.iter().fold(vec![], |mut acc, v| { + let (metric, _) = v; + acc.push(metric.clone()); + acc + }); + + if let ParallelMethod::Others = self.which_method() { + let metric_option = metrics_values + .iter() + .find(|v| { + let (_, cweight) = v; + cweight.is_some() + }) + .unwrap(); + let (_, cweight) = metric_option; + self.set_cweight(cweight.unwrap()); + } + self.aggregate_frame_results(&metrics) } + fn which_method(&self) -> ParallelMethod; + + fn set_cweight(&mut self, cweight: f64); + fn process_frame( - &mut self, + &self, frame1: &FrameInfo, frame2: &FrameInfo, - ) -> Result>; + ) -> Result<(Self::FrameResult, Option), MetricsError>; #[cfg(feature = "decode")] fn aggregate_frame_results( diff --git a/av_metrics/src/video/psnr.rs b/av_metrics/src/video/psnr.rs index 9c021ce..a614df0 100644 --- a/av_metrics/src/video/psnr.rs +++ b/av_metrics/src/video/psnr.rs @@ -8,7 +8,9 @@ use crate::video::decode::Decoder; use crate::video::pixel::CastFromPrimitive; use crate::video::pixel::Pixel; +use crate::video::ParallelMethod; use crate::video::{FrameInfo, PlanarMetrics, PlaneData, VideoMetric}; +use crate::MetricsError; use std::error::Error; /// Calculates the PSNR for two videos. Higher is better. @@ -53,7 +55,7 @@ pub fn calculate_frame_psnr( frame1: &FrameInfo, frame2: &FrameInfo, ) -> Result> { - let metrics = Psnr.process_frame(frame1, frame2)?; + let (metrics, _) = Psnr.process_frame(frame1, frame2)?; Ok(PlanarMetrics { y: calculate_psnr(metrics[0]), u: calculate_psnr(metrics[1]), @@ -75,17 +77,17 @@ impl VideoMetric for Psnr { type VideoResult = PsnrResults; fn process_frame( - &mut self, + &self, frame1: &FrameInfo, frame2: &FrameInfo, - ) -> Result> { + ) -> Result<(Self::FrameResult, Option), MetricsError> { frame1.can_compare(&frame2)?; let bit_depth = frame1.bit_depth; let y = calculate_plane_psnr_metrics(&frame1.planes[0], &frame2.planes[0], bit_depth); let u = calculate_plane_psnr_metrics(&frame1.planes[1], &frame2.planes[1], bit_depth); let v = calculate_plane_psnr_metrics(&frame1.planes[2], &frame2.planes[2], bit_depth); - Ok([y, u, v]) + Ok(([y, u, v], None)) } #[cfg(feature = "decode")] @@ -111,6 +113,12 @@ impl VideoMetric for Psnr { }; Ok(PsnrResults { psnr, apsnr }) } + + fn which_method(&self) -> ParallelMethod { + ParallelMethod::Psnr + } + + fn set_cweight(&mut self, _cweight: f64) {} } #[derive(Debug, Clone, Copy, Default)] diff --git a/av_metrics/src/video/psnr_hvs.rs b/av_metrics/src/video/psnr_hvs.rs index 0ef6413..ac45b3d 100644 --- a/av_metrics/src/video/psnr_hvs.rs +++ b/av_metrics/src/video/psnr_hvs.rs @@ -10,7 +10,9 @@ use crate::video::decode::Decoder; use crate::video::pixel::CastFromPrimitive; use crate::video::pixel::Pixel; +use crate::video::ParallelMethod; use crate::video::{FrameInfo, PlanarMetrics, PlaneData, VideoMetric}; +use crate::MetricsError; use std::error::Error; /// Calculates the PSNR-HVS score between two videos. Higher is better. @@ -30,8 +32,8 @@ pub fn calculate_frame_psnr_hvs( frame1: &FrameInfo, frame2: &FrameInfo, ) -> Result> { - let mut processor = PsnrHvs::default(); - let result = processor.process_frame(frame1, frame2)?; + let processor = PsnrHvs::default(); + let (result, _) = processor.process_frame(frame1, frame2)?; let cweight = processor.cweight.unwrap(); Ok(PlanarMetrics { y: log10_convert(result.y, 1.0), @@ -56,26 +58,30 @@ impl VideoMetric for PsnrHvs { /// Returns the *unweighted* scores. Depending on whether we output per-frame /// or per-video, these will be weighted at different points. fn process_frame( - &mut self, + &self, frame1: &FrameInfo, frame2: &FrameInfo, - ) -> Result> { + ) -> Result<(Self::FrameResult, Option), MetricsError> { frame1.can_compare(&frame2)?; + let mut cweight = None; if self.cweight.is_none() { - self.cweight = Some(frame1.chroma_sampling.get_chroma_weight()); + cweight = Some(frame1.chroma_sampling.get_chroma_weight()); } let bit_depth = frame1.bit_depth; let y = calculate_plane_psnr_hvs(&frame1.planes[0], &frame2.planes[0], 0, bit_depth); let u = calculate_plane_psnr_hvs(&frame1.planes[1], &frame2.planes[1], 1, bit_depth); let v = calculate_plane_psnr_hvs(&frame1.planes[2], &frame2.planes[2], 2, bit_depth); - Ok(PlanarMetrics { - y, - u, - v, - // field not used here - avg: 0., - }) + Ok(( + PlanarMetrics { + y, + u, + v, + // field not used here + avg: 0., + }, + cweight, + )) } #[cfg(feature = "decode")] @@ -97,6 +103,14 @@ impl VideoMetric for PsnrHvs { ), }) } + + fn which_method(&self) -> ParallelMethod { + ParallelMethod::Others + } + + fn set_cweight(&mut self, cweight: f64) { + self.cweight = Some(cweight); + } } // Normalized inverse quantization matrix for 8x8 DCT at the point of transparency. diff --git a/av_metrics/src/video/ssim.rs b/av_metrics/src/video/ssim.rs index 404a2a4..78724fe 100644 --- a/av_metrics/src/video/ssim.rs +++ b/av_metrics/src/video/ssim.rs @@ -12,7 +12,9 @@ use crate::video::decode::Decoder; use crate::video::pixel::CastFromPrimitive; use crate::video::pixel::Pixel; +use crate::video::ParallelMethod; use crate::video::{FrameInfo, PlanarMetrics, PlaneData, VideoMetric}; +use crate::MetricsError; use std::cmp; use std::error::Error; use std::f64::consts::{E, PI}; @@ -34,8 +36,8 @@ pub fn calculate_frame_ssim( frame1: &FrameInfo, frame2: &FrameInfo, ) -> Result> { - let mut processor = Ssim::default(); - let result = processor.process_frame(frame1, frame2)?; + let processor = Ssim::default(); + let (result, _) = processor.process_frame(frame1, frame2)?; let cweight = processor.cweight.unwrap(); Ok(PlanarMetrics { y: log10_convert(result.y, 1.0), @@ -60,13 +62,14 @@ impl VideoMetric for Ssim { /// Returns the *unweighted* scores. Depending on whether we output per-frame /// or per-video, these will be weighted at different points. fn process_frame( - &mut self, + &self, frame1: &FrameInfo, frame2: &FrameInfo, - ) -> Result> { + ) -> Result<(Self::FrameResult, Option), MetricsError> { frame1.can_compare(&frame2)?; + let mut cweight = None; if self.cweight.is_none() { - self.cweight = Some(frame1.chroma_sampling.get_chroma_weight()); + cweight = Some(frame1.chroma_sampling.get_chroma_weight()); } const KERNEL_SHIFT: usize = 8; @@ -109,13 +112,16 @@ impl VideoMetric for Ssim { &v_kernel, &v_kernel, ); - Ok(PlanarMetrics { - y, - u, - v, - // Not used here - avg: 0., - }) + Ok(( + PlanarMetrics { + y, + u, + v, + // Not used here + avg: 0., + }, + cweight, + )) } #[cfg(feature = "decode")] @@ -137,6 +143,14 @@ impl VideoMetric for Ssim { ), }) } + + fn which_method(&self) -> ParallelMethod { + ParallelMethod::Others + } + + fn set_cweight(&mut self, cweight: f64) { + self.cweight = Some(cweight); + } } /// Calculates the MSSSIM score between two videos. Higher is better. @@ -164,8 +178,8 @@ pub fn calculate_frame_msssim( frame1: &FrameInfo, frame2: &FrameInfo, ) -> Result> { - let mut processor = MsSsim::default(); - let result = processor.process_frame(frame1, frame2)?; + let processor = MsSsim::default(); + let (result, _) = processor.process_frame(frame1, frame2)?; let cweight = processor.cweight.unwrap(); Ok(PlanarMetrics { y: log10_convert(result.y, 1.0), @@ -190,23 +204,27 @@ impl VideoMetric for MsSsim { /// Returns the *unweighted* scores. Depending on whether we output per-frame /// or per-video, these will be weighted at different points. fn process_frame( - &mut self, + &self, frame1: &FrameInfo, frame2: &FrameInfo, - ) -> Result> { + ) -> Result<(Self::FrameResult, Option), MetricsError> { frame1.can_compare(&frame2)?; + let mut cweight = None; if self.cweight.is_none() { - self.cweight = Some(frame1.chroma_sampling.get_chroma_weight()); + cweight = Some(frame1.chroma_sampling.get_chroma_weight()); } let bit_depth = frame1.bit_depth; - Ok(PlanarMetrics { - y: calculate_plane_msssim(&frame1.planes[0], &frame2.planes[0], bit_depth), - u: calculate_plane_msssim(&frame1.planes[1], &frame2.planes[1], bit_depth), - v: calculate_plane_msssim(&frame1.planes[2], &frame2.planes[2], bit_depth), - // Not used here - avg: 0., - }) + Ok(( + PlanarMetrics { + y: calculate_plane_msssim(&frame1.planes[0], &frame2.planes[0], bit_depth), + u: calculate_plane_msssim(&frame1.planes[1], &frame2.planes[1], bit_depth), + v: calculate_plane_msssim(&frame1.planes[2], &frame2.planes[2], bit_depth), + // Not used here + avg: 0., + }, + cweight, + )) } #[cfg(feature = "decode")] @@ -228,6 +246,14 @@ impl VideoMetric for MsSsim { ), }) } + + fn which_method(&self) -> ParallelMethod { + ParallelMethod::Others + } + + fn set_cweight(&mut self, cweight: f64) { + self.cweight = Some(cweight); + } } #[derive(Debug, Clone, Copy, Default)]