diff --git a/crates/rspack_plugin_real_content_hash/Cargo.toml b/crates/rspack_plugin_real_content_hash/Cargo.toml index b680bfbe383b..0f6d9501f6f9 100644 --- a/crates/rspack_plugin_real_content_hash/Cargo.toml +++ b/crates/rspack_plugin_real_content_hash/Cargo.toml @@ -32,4 +32,4 @@ workspace = true [lib] doctest = false -test = false +test = true diff --git a/crates/rspack_plugin_real_content_hash/src/lib.rs b/crates/rspack_plugin_real_content_hash/src/lib.rs index 16af56e75c1f..66de1f39d62f 100644 --- a/crates/rspack_plugin_real_content_hash/src/lib.rs +++ b/crates/rspack_plugin_real_content_hash/src/lib.rs @@ -18,7 +18,7 @@ use rspack_core::{ }; use rspack_error::{Result, ToStringResultToRspackResultExt}; use rspack_hash::RspackHash; -use rspack_hook::{plugin, plugin_hook}; +use rspack_hook::{Hook, plugin, plugin_hook}; use rspack_util::fx_hash::FxDashMap; use rustc_hash::{FxHashMap as HashMap, FxHashSet as HashSet, FxHasher}; @@ -133,6 +133,10 @@ async fn inner_impl(compilation: &mut Compilation) -> Result<()> { let mut hash_to_new_hash = HashMap::default(); let hooks = RealContentHashPlugin::get_compilation_hooks(compilation.id()); + let has_update_hash_hook = { + let hooks = hooks.borrow(); + !hooks.update_hash.used_stages().is_empty() + }; let mut computed_hashes = HashSet::default(); let mut top_task = ordered_hashes_iter.next(); @@ -157,75 +161,138 @@ async fn inner_impl(compilation: &mut Compilation) -> Result<()> { } } - let batch_source_tasks = batch - .iter() - .filter_map(|hash| { - let assets_names = hash_to_asset_names.get(hash.as_str())?; - let tasks = assets_names + let new_hashes = if has_update_hash_hook { + let batch_source_tasks = batch + .iter() + .filter_map(|hash| { + let assets_names = hash_to_asset_names.get(hash.as_str())?; + let tasks = assets_names + .iter() + .filter_map(|name| { + let data = assets_data.get(name)?; + Some((hash.as_str(), *name, data)) + }) + .collect::>(); + Some(tasks) + }) + .flatten() + .collect::>(); + + let batch_sources = batch_source_tasks + .into_par_iter() + .map(|(hash, name, data)| { + let new_source = + data.compute_new_source(data.own_hashes.contains(hash), &hash_to_new_hash, &hash_ac); + ((hash, name), new_source) + }) + .collect::>(); + + rspack_parallel::scope::<_, Result<_>>(|token| { + batch .iter() - .filter_map(|name| { - let data = assets_data.get(name)?; - Some((hash.as_str(), *name, data)) + .cloned() + .filter_map(|old_hash| { + let asset_names = hash_to_asset_names.remove(old_hash.as_str())?; + Some((old_hash, asset_names)) }) - .collect::>(); - Some(tasks) - }) - .flatten() - .collect::>(); - - let batch_sources = batch_source_tasks - .into_par_iter() - .map(|(hash, name, data)| { - let new_source = - data.compute_new_source(data.own_hashes.contains(hash), &hash_to_new_hash, &hash_ac); - ((hash, name), new_source) + .for_each(|(old_hash, asset_names)| { + let s = + unsafe { token.used((&hooks, &compilation, &batch_sources, old_hash, asset_names)) }; + s.spawn( + |(hooks, compilation, batch_sources, old_hash, mut asset_names)| async move { + asset_names.sort_unstable(); + let mut asset_contents = asset_names + .iter() + .filter_map(|name| batch_sources.get(&(old_hash.as_str(), name))) + .cloned() + .collect::>(); + asset_contents.dedup(); + let updated_hash = hooks + .borrow() + .update_hash + .call(compilation, &asset_contents, &old_hash) + .await?; + + let new_hash = if let Some(new_hash) = updated_hash { + new_hash + } else { + let mut hasher = RspackHash::from(&compilation.options.output); + for asset_content in asset_contents { + hasher.write(&asset_content.buffer()); + } + let new_hash = hasher.digest(&compilation.options.output.hash_digest); + + new_hash.rendered(old_hash.len()).to_string() + }; + + Ok((old_hash.clone(), new_hash)) + }, + ); + }); }) - .collect::>(); - - let new_hashes = rspack_parallel::scope::<_, Result<_>>(|token| { - batch - .iter() - .cloned() - .filter_map(|old_hash| { - let asset_names = hash_to_asset_names.remove(old_hash.as_str())?; - Some((old_hash, asset_names)) - }) - .for_each(|(old_hash, asset_names)| { - let s = - unsafe { token.used((&hooks, &compilation, &batch_sources, old_hash, asset_names)) }; - s.spawn( - |(hooks, compilation, batch_sources, old_hash, mut asset_names)| async move { - asset_names.sort_unstable(); - let mut asset_contents = asset_names - .iter() - .filter_map(|name| batch_sources.get(&(old_hash.as_str(), name))) - .cloned() - .collect::>(); - asset_contents.dedup(); - let updated_hash = hooks - .borrow() - .update_hash - .call(compilation, &asset_contents, &old_hash) - .await?; - - let new_hash = if let Some(new_hash) = updated_hash { - new_hash - } else { + .await + } else { + rspack_parallel::scope::<_, Result<_>>(|token| { + batch + .iter() + .cloned() + .filter_map(|old_hash| { + let asset_names = hash_to_asset_names.remove(old_hash.as_str())?; + Some((old_hash, asset_names)) + }) + .for_each(|(old_hash, asset_names)| { + let s = unsafe { + token.used(( + &compilation, + &assets_data, + &hash_to_new_hash, + &hash_ac, + old_hash, + asset_names, + )) + }; + s.spawn( + |( + compilation, + assets_data, + hash_to_new_hash, + hash_ac, + old_hash, + mut asset_names, + )| async move { + asset_names.sort_unstable(); let mut hasher = RspackHash::from(&compilation.options.output); - for asset_content in asset_contents { - hasher.write(&asset_content.buffer()); + let mut previous_source: Option = None; + + for name in asset_names { + let data = assets_data + .get(name) + .expect("RealContentHashPlugin: should have asset data"); + let source = data.compute_new_source( + data.own_hashes.contains(old_hash.as_str()), + hash_to_new_hash, + hash_ac, + ); + + if previous_source.as_ref() == Some(&source) { + continue; + } + + hasher.write(&source.buffer()); + previous_source = Some(source); } - let new_hash = hasher.digest(&compilation.options.output.hash_digest); - - new_hash.rendered(old_hash.len()).to_string() - }; - Ok((old_hash.clone(), new_hash)) - }, - ); - }); - }) - .await + let new_hash = hasher.digest(&compilation.options.output.hash_digest); + Ok(( + old_hash.clone(), + new_hash.rendered(old_hash.len()).to_string(), + )) + }, + ); + }); + }) + .await + } .into_iter() .map(|r| r.to_rspack_result()) .collect::>>()?; @@ -244,7 +311,7 @@ async fn inner_impl(compilation: &mut Compilation) -> Result<()> { let updates: Vec<_> = assets_data .into_par_iter() .filter_map(|(name, data)| { - let new_source = data.compute_new_source(false, &hash_to_new_hash, &hash_ac); + let source_changed = data.needs_source_update(false, &hash_to_new_hash); let mut new_name = String::with_capacity(name.len()); hash_ac.replace_all_with(name, &mut new_name, |_, hash, dst| { let replace_to = hash_to_new_hash @@ -254,29 +321,32 @@ async fn inner_impl(compilation: &mut Compilation) -> Result<()> { true }); let new_name = (name != new_name).then_some(new_name); - Some((name.to_owned(), new_source, new_name)) + let new_hashes = data.updated_content_hashes(&hash_to_new_hash); + let hashes_changed = new_hashes != data.content_hashes; + + if !source_changed && !hashes_changed && new_name.is_none() { + return None; + } + + let new_source = if source_changed { + data.compute_new_source(false, &hash_to_new_hash, &hash_ac) + } else { + data.old_source.clone() + }; + + Some((name.to_owned(), new_source, new_name, new_hashes)) }) .collect(); logger.time_end(start); let start = logger.time("update assets"); let mut asset_renames = Vec::with_capacity(updates.len()); - for (name, new_source, new_name) in updates { + for (name, new_source, new_name, new_hashes) in updates { compilation.update_asset(&name, |_, old_info| { - let new_hashes: HashSet<_> = old_info - .content_hash - .iter() - .map(|old_hash| { - hash_to_new_hash - .get(old_hash.as_str()) - .expect("should have new hash") - .to_owned() - }) - .collect(); let info_update = (*old_info).clone(); Ok(( new_source.clone(), - BindingCell::from(info_update.with_content_hashes(new_hashes)), + BindingCell::from(info_update.with_content_hashes(new_hashes.clone())), )) })?; if let Some(new_name) = new_name { @@ -293,6 +363,7 @@ async fn inner_impl(compilation: &mut Compilation) -> Result<()> { #[derive(Debug)] struct AssetData { + content_hashes: HashSet, own_hashes: HashSet, referenced_hashes: HashSet, #[debug(skip)] @@ -307,7 +378,7 @@ struct AssetData { #[derive(Debug)] enum AssetDataContent { - Buffer, + Opaque, String(String), } @@ -324,12 +395,17 @@ impl AssetData { } referenced_hashes.insert(hash.to_string()); } - AssetDataContent::String(content.into_owned()) + if own_hashes.is_empty() && referenced_hashes.is_empty() { + AssetDataContent::Opaque + } else { + AssetDataContent::String(content.into_owned()) + } } else { - AssetDataContent::Buffer + AssetDataContent::Opaque }; Self { + content_hashes: info.content_hash.clone(), own_hashes, referenced_hashes, old_source: source, @@ -339,25 +415,56 @@ impl AssetData { } } + fn needs_source_update( + &self, + without_own: bool, + hash_to_new_hash: &HashMap, + ) -> bool { + if without_own && !self.own_hashes.is_empty() { + return true; + } + + self + .own_hashes + .iter() + .chain(self.referenced_hashes.iter()) + .any(|hash| { + hash_to_new_hash + .get(hash.as_str()) + .is_some_and(|new_hash| new_hash != hash) + }) + } + + fn updated_content_hashes(&self, hash_to_new_hash: &HashMap) -> HashSet { + self + .content_hashes + .iter() + .map(|old_hash| { + hash_to_new_hash + .get(old_hash.as_str()) + .expect("RealContentHashPlugin: should have new hash") + .to_owned() + }) + .collect() + } + pub fn compute_new_source( &self, without_own: bool, hash_to_new_hash: &HashMap, hash_ac: &AhoCorasick, ) -> BoxSource { + if !self.needs_source_update(without_own, hash_to_new_hash) { + return self.old_source.clone(); + } + (if without_own { &self.new_source_without_own } else { &self.new_source }) .get_or_init(|| { - if let AssetDataContent::String(content) = &self.content - && (!self.own_hashes.is_empty() - || self - .referenced_hashes - .iter() - .any(|hash| matches!(hash_to_new_hash.get(hash.as_str()), Some(h) if h != hash))) - { + if let AssetDataContent::String(content) = &self.content { let mut new_content = String::with_capacity(content.len()); hash_ac.replace_all_with(content, &mut new_content, |_, hash, dst| { let replace_to = if without_own && self.own_hashes.contains(hash) { @@ -469,3 +576,67 @@ impl OrderedHashesBuilder<'_> { stack.remove(hash); } } + +#[cfg(test)] +mod tests { + use rspack_core::rspack_sources::{RawStringSource, SourceExt}; + + use super::*; + + fn build_hash_ac(hashes: &[&str]) -> AhoCorasick { + AhoCorasick::builder() + .match_kind(MatchKind::LeftmostLongest) + .build(hashes.iter().map(|hash| hash.as_bytes())) + .expect("should build hash matcher") + } + + #[test] + fn string_assets_without_hash_matches_do_not_store_owned_content() { + let source = RawStringSource::from("console.log('rspack');".to_string()).boxed(); + let hash_ac = build_hash_ac(&["deadbeef"]); + + let data = AssetData::new(source, &AssetInfo::default(), &hash_ac); + + assert!(matches!(data.content, AssetDataContent::Opaque)); + assert!(data.own_hashes.is_empty()); + assert!(data.referenced_hashes.is_empty()); + } + + #[test] + fn unchanged_own_hash_does_not_prepare_final_source() { + let old_hash = "deadbeef"; + let content = format!("asset-{old_hash}.js"); + let source = RawStringSource::from(content.clone()).boxed(); + let hash_ac = build_hash_ac(&[old_hash]); + let mut info = AssetInfo::default(); + info.set_content_hash(old_hash.to_string()); + + let data = AssetData::new(source, &info, &hash_ac); + let hash_to_new_hash = HashMap::from_iter([(old_hash.to_string(), old_hash.to_string())]); + + assert!(!data.needs_source_update(false, &hash_to_new_hash)); + + let rendered = data.compute_new_source(false, &hash_to_new_hash, &hash_ac); + + assert_eq!(rendered.source().into_string_lossy(), content); + assert!(data.new_source.get().is_none()); + } + + #[test] + fn temporary_source_still_strips_own_hash_when_hash_is_unchanged() { + let old_hash = "deadbeef"; + let source = RawStringSource::from(format!("asset-{old_hash}.js")).boxed(); + let hash_ac = build_hash_ac(&[old_hash]); + let mut info = AssetInfo::default(); + info.set_content_hash(old_hash.to_string()); + + let data = AssetData::new(source, &info, &hash_ac); + let hash_to_new_hash = HashMap::from_iter([(old_hash.to_string(), old_hash.to_string())]); + + assert!(data.needs_source_update(true, &hash_to_new_hash)); + + let rendered = data.compute_new_source(true, &hash_to_new_hash, &hash_ac); + + assert_eq!(rendered.source().into_string_lossy(), "asset-.js"); + } +}