Skip to content

Commit 4e5b509

Browse files
mmitcheCopilot
andauthored
Compare tar.gz files between VMR and current .NET builds (#47667)
Co-authored-by: Copilot <[email protected]>
1 parent 429ef6c commit 4e5b509

File tree

1 file changed

+200
-21
lines changed
  • src/SourceBuild/content/eng/tools/BuildComparer

1 file changed

+200
-21
lines changed

src/SourceBuild/content/eng/tools/BuildComparer/Program.cs

Lines changed: 200 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
using NuGet.Packaging;
33
using System.Collections.Immutable;
44
using System.CommandLine;
5+
using System.Formats.Tar;
56
using System.IO.Compression;
67
using System.Reflection;
78
using System.Reflection.Metadata;
@@ -44,7 +45,7 @@ static int Main(string[] args)
4445
var parallelismArgument = new CliOption<int>("-parallel")
4546
{
4647
Description = "Amount of parallelism used while analyzing the builds.",
47-
DefaultValueFactory = _ => 16,
48+
DefaultValueFactory = _ => 8,
4849
Required = true
4950
};
5051
var baselineArgument = new CliOption<string>("-baseline")
@@ -345,6 +346,11 @@ private async Task EvaluatePackage(AssetMapping mapping)
345346
static readonly ImmutableArray<string> IncludedAssemblyNameCheckFileExtensions = [".dll", ".exe"];
346347

347348

349+
/// <summary>
350+
/// Evaluate the contents of a mapping between two packages.
351+
/// </summary>
352+
/// <param name="mapping"></param>
353+
/// <returns></returns>
348354
public async Task EvaluatePackageContents(AssetMapping mapping)
349355
{
350356
var diffNugetPackagePath = mapping.DiffFilePath;
@@ -373,31 +379,41 @@ public async Task EvaluatePackageContents(AssetMapping mapping)
373379
}
374380
}
375381

382+
/// <summary>
383+
/// Compare the file lists of packages, identifying missing and extra files.
384+
/// </summary>
385+
/// <param name="mapping"></param>
386+
/// <param name="testPackageReader"></param>
387+
/// <param name="baselinePackageReader"></param>
376388
private async void ComparePackageFileLists(AssetMapping mapping, PackageArchiveReader testPackageReader, PackageArchiveReader baselinePackageReader)
377389
{
378390
IEnumerable<string> baselineFiles = (await baselinePackageReader.GetFilesAsync(CancellationToken.None));
379391
IEnumerable<string> testFiles = (await testPackageReader.GetFilesAsync(CancellationToken.None));
380392

381-
var missingFiles = RemovePackageFilesToIgnore(baselineFiles.Except(testFiles));
393+
// Strip down the baseline and test files to remove version numbers.
394+
var strippedBaselineFiles = baselineFiles.Select(f => RemoveVersionsNormalized(f)).ToList();
395+
var strippedTestFiles = testFiles.Select(f => RemoveVersionsNormalized(f)).ToList();
396+
397+
var missingFiles = RemovePackageFilesToIgnore(strippedBaselineFiles.Except(strippedTestFiles));
382398

383399
foreach (var missingFile in missingFiles)
384400
{
385401
mapping.Issues.Add(new Issue
386402
{
387403
IssueType = IssueType.MissingPackageContent,
388-
Description = $"Package '{mapping.Id}' is missing the following files in the VMR: {string.Join(", ", missingFile)}"
404+
Description = missingFile,
389405
});
390406
}
391407

392408
// Compare the other way, and identify content in the VMR that is not in the baseline
393-
var extraFiles = RemovePackageFilesToIgnore(testFiles.Except(baselineFiles));
409+
var extraFiles = RemovePackageFilesToIgnore(strippedTestFiles.Except(strippedBaselineFiles));
394410

395411
foreach (var extraFile in extraFiles)
396412
{
397413
mapping.Issues.Add(new Issue
398414
{
399415
IssueType = IssueType.ExtraPackageContent,
400-
Description = $"Package '{mapping.Id}' has extra files in the VMR: {string.Join(", ", extraFile)}"
416+
Description = extraFile
401417
});
402418
}
403419

@@ -423,8 +439,8 @@ private static async Task ComparePackageAssemblyVersions(AssetMapping mapping, P
423439
{
424440
try
425441
{
426-
using var baselineStream = await CopyStreamToSeekableStream(baselinePackageReader.GetEntry(fileName).Open());
427-
using var testStream = await CopyStreamToSeekableStream(testPackageReader.GetEntry(fileName).Open());
442+
using var baselineStream = await CopyStreamToSeekableStreamAsync(baselinePackageReader.GetEntry(fileName).Open());
443+
using var testStream = await CopyStreamToSeekableStreamAsync(testPackageReader.GetEntry(fileName).Open());
428444

429445
CompareAssemblyVersions(mapping, fileName, baselineStream, testStream);
430446
}
@@ -435,7 +451,12 @@ private static async Task ComparePackageAssemblyVersions(AssetMapping mapping, P
435451
}
436452
}
437453

438-
private static async Task<Stream> CopyStreamToSeekableStream(Stream stream)
454+
/// <summary>
455+
/// Copies a stream from an archive to a seekable stream (MemoryStream).
456+
/// </summary>
457+
/// <param name="stream"></param>
458+
/// <returns></returns>
459+
private static async Task<Stream> CopyStreamToSeekableStreamAsync(Stream stream)
439460
{
440461
var outputStream = new MemoryStream();
441462
await stream.CopyToAsync(outputStream, CancellationToken.None);
@@ -532,15 +553,164 @@ public async Task EvaluateBlobContents(AssetMapping mapping)
532553
{
533554
// Switch on the file type, and call a helper based on the type
534555

535-
switch (Path.GetExtension(mapping.Id))
556+
if (mapping.Id.EndsWith(".zip"))
536557
{
537-
case ".zip":
538-
await CompareZipArchiveContents(mapping);
539-
break;
540-
default:
541-
return;
558+
await CompareZipArchiveContents(mapping);
559+
}
560+
else if (mapping.Id.EndsWith(".tar.gz") || mapping.Id.EndsWith(".tgz"))
561+
{
562+
await CompareTarArchiveContents(mapping);
542563
}
543564
}
565+
private async Task CompareTarArchiveContents(AssetMapping mapping)
566+
{
567+
var diffTarPath = mapping.DiffFilePath;
568+
var baselineTarPath = mapping.BaseBuildFilePath;
569+
// If either of the paths don't exist, we can't run this comparison
570+
if (diffTarPath == null || baselineTarPath == null)
571+
{
572+
return;
573+
}
574+
575+
try
576+
{
577+
// Get the file lists for the baseline and diff tar files
578+
IEnumerable<string> baselineFiles = GetTarGzArchiveFileList(baselineTarPath);
579+
IEnumerable<string> diffFiles = GetTarGzArchiveFileList(diffTarPath);
580+
581+
// Compare file lists
582+
CompareBlobArchiveFileLists(mapping, baselineFiles, diffFiles);
583+
584+
// Compare assembly versions
585+
await CompareTarGzAssemblyVersions(mapping, baselineFiles, diffFiles);
586+
}
587+
catch (Exception e)
588+
{
589+
mapping.EvaluationErrors.Add(e.ToString());
590+
}
591+
}
592+
593+
private List<string> GetTarGzArchiveFileList(string archivePath)
594+
{
595+
List<string> entries = new();
596+
using (FileStream fileStream = File.OpenRead(archivePath))
597+
{
598+
using (GZipStream gzipStream = new GZipStream(fileStream, CompressionMode.Decompress))
599+
using (TarReader reader = new TarReader(gzipStream))
600+
{
601+
TarEntry entry;
602+
while ((entry = reader.GetNextEntry()) != null)
603+
{
604+
entries.Add(entry.Name);
605+
}
606+
}
607+
}
608+
609+
return entries;
610+
}
611+
612+
/// <summary>
613+
/// This method is called "USE ALL AVAILABLE MEMORY"
614+
/// </summary>
615+
/// <param name="mapping"></param>
616+
/// <param name="baselineFiles"></param>
617+
/// <param name="diffFiles"></param>
618+
/// <returns></returns>
619+
private async Task CompareTarGzAssemblyVersions(AssetMapping mapping, IEnumerable<string> baselineFiles, IEnumerable<string> diffFiles)
620+
{
621+
// Get the list of common files and create a map of file->stream
622+
var strippedBaselineFiles = baselineFiles.Select(f => RemoveVersionsNormalized(f)).ToList();
623+
var strippedDiffFiles = diffFiles.Select(f => RemoveVersionsNormalized(f)).ToList();
624+
625+
var commonFiles = strippedBaselineFiles.Intersect(strippedDiffFiles).ToHashSet();
626+
627+
var baselineStreams = new Dictionary<string, Stream>();
628+
var diffStreams = new Dictionary<string, Stream>();
629+
630+
using (FileStream baseStream = File.OpenRead(mapping.BaseBuildFilePath))
631+
{
632+
using (FileStream diffStream = File.OpenRead(mapping.DiffFilePath))
633+
{
634+
using (GZipStream baseGzipStream = new GZipStream(baseStream, CompressionMode.Decompress))
635+
using (TarReader baseReader = new TarReader(baseGzipStream))
636+
{
637+
using (GZipStream diffGzipStream = new GZipStream(diffStream, CompressionMode.Decompress))
638+
using (TarReader diffReader = new TarReader(diffGzipStream))
639+
{
640+
string nextBaseEntry = null;
641+
string nextDiffEntry = null;
642+
do
643+
{
644+
nextBaseEntry = await WalkNextCommon(commonFiles, baseReader, baselineStreams);
645+
if (nextBaseEntry != null)
646+
{
647+
CompareAvailableStreams(mapping, baselineStreams, diffStreams, nextBaseEntry);
648+
}
649+
650+
nextDiffEntry = await WalkNextCommon(commonFiles, diffReader, diffStreams);
651+
if (nextDiffEntry != null)
652+
{
653+
CompareAvailableStreams(mapping, baselineStreams, diffStreams, nextDiffEntry);
654+
}
655+
}
656+
while (nextBaseEntry != null || nextDiffEntry != null);
657+
658+
// If there are any remaining streams, create an evaluation error
659+
if (baselineStreams.Count > 0 || diffStreams.Count > 0)
660+
{
661+
mapping.EvaluationErrors.Add("Failed to compare all tar entries.");
662+
}
663+
}
664+
}
665+
}
666+
}
667+
668+
// Walk the tar to the next entry that exists in both the base and the diff
669+
static async Task<string> WalkNextCommon(HashSet<string> commonFiles, TarReader reader, Dictionary<string, Stream> streams)
670+
{
671+
TarEntry baseEntry;
672+
while ((baseEntry = reader.GetNextEntry()) != null && baseEntry.DataStream != null)
673+
{
674+
string entryStripped = RemoveVersionsNormalized(baseEntry.Name);
675+
// If the element lives in the common files hash set, then copy it to a memory stream.
676+
// Do not close the stream.
677+
if (commonFiles.Contains(entryStripped))
678+
{
679+
streams[entryStripped] = await CopyStreamToSeekableStreamAsync(baseEntry.DataStream);
680+
return entryStripped;
681+
}
682+
}
683+
return null;
684+
}
685+
686+
// Given we have a new entry that is common between base and diff, attempt to do some comparisons.
687+
void CompareAvailableStreams(AssetMapping mapping, Dictionary<string, Stream> baselineStreams, Dictionary<string, Stream> diffStreams,
688+
string entry)
689+
{
690+
if (baselineStreams.TryGetValue(entry, out var baselineFileStream) &&
691+
diffStreams.TryGetValue(entry, out var diffFileStream))
692+
{
693+
CompareAssemblyVersions(mapping, entry, baselineFileStream, diffFileStream);
694+
baselineFileStream.Dispose();
695+
diffFileStream.Dispose();
696+
baselineStreams.Remove(entry);
697+
diffStreams.Remove(entry);
698+
}
699+
}
700+
}
701+
702+
private static string RemoveVersionsNormalized(string path)
703+
{
704+
string strippedPath = path.Replace("\\", "//");
705+
string prevPath = path;
706+
do
707+
{
708+
prevPath = strippedPath;
709+
strippedPath = VersionIdentifier.RemoveVersions(strippedPath);
710+
} while (prevPath != strippedPath);
711+
712+
return strippedPath;
713+
}
544714

545715
private async Task CompareZipArchiveContents(AssetMapping mapping)
546716
{
@@ -586,8 +756,8 @@ private async Task CompareZipAssemblyVersions(AssetMapping mapping, ZipArchive d
586756
{
587757
try
588758
{
589-
using var baselineStream = await CopyStreamToSeekableStream(baselineArchive.GetEntry(fileName).Open());
590-
using var testStream = await CopyStreamToSeekableStream(diffArchive.GetEntry(fileName).Open());
759+
using var baselineStream = await CopyStreamToSeekableStreamAsync(baselineArchive.GetEntry(fileName).Open());
760+
using var testStream = await CopyStreamToSeekableStreamAsync(diffArchive.GetEntry(fileName).Open());
591761

592762
CompareAssemblyVersions(mapping, fileName, baselineStream, testStream);
593763
}
@@ -600,7 +770,16 @@ private async Task CompareZipAssemblyVersions(AssetMapping mapping, ZipArchive d
600770

601771
private static void CompareAssemblyVersions(AssetMapping mapping, string fileName, Stream baselineStream, Stream testStream)
602772
{
603-
AssemblyName baselineAssemblyName = GetAssemblyName(baselineStream, fileName);
773+
AssemblyName baselineAssemblyName = null;
774+
try
775+
{
776+
baselineAssemblyName = GetAssemblyName(baselineStream, fileName);
777+
}
778+
catch (BadImageFormatException)
779+
{
780+
// Assume the file is not an assembly, and then don't attempt for the test assembly
781+
return;
782+
}
604783
AssemblyName testAssemblyName = GetAssemblyName(testStream, fileName);
605784
if ((baselineAssemblyName == null) != (testAssemblyName == null))
606785
{
@@ -629,16 +808,16 @@ private static void CompareBlobArchiveFileLists(AssetMapping mapping, IEnumerabl
629808
{
630809
// Because these typically contain version numbers in their paths, we need to go and remove those.
631810

632-
var strippedBaselineFiles = baselineFiles.Select(f => VersionIdentifier.RemoveVersions(f)).ToList();
633-
var strippedDiffFiles = diffFiles.Select(f => VersionIdentifier.RemoveVersions(f)).ToList();
811+
var strippedBaselineFiles = baselineFiles.Select(f => RemoveVersionsNormalized(f)).ToList();
812+
var strippedDiffFiles = diffFiles.Select(f => RemoveVersionsNormalized(f)).ToList();
634813

635814
var missingFiles = strippedBaselineFiles.Except(strippedDiffFiles);
636815
foreach (var missingFile in missingFiles)
637816
{
638817
mapping.Issues.Add(new Issue
639818
{
640819
IssueType = IssueType.MissingPackageContent,
641-
Description = $"Blob '{mapping.Id}' is missing the following files in the VMR: {string.Join(", ", missingFile)}"
820+
Description = missingFile
642821
});
643822
}
644823
// Compare the other way, and identify content in the VMR that is not in the baseline
@@ -648,7 +827,7 @@ private static void CompareBlobArchiveFileLists(AssetMapping mapping, IEnumerabl
648827
mapping.Issues.Add(new Issue
649828
{
650829
IssueType = IssueType.ExtraPackageContent,
651-
Description = $"Blob '{mapping.Id}' has extra files in the VMR: {string.Join(", ", extraFile)}"
830+
Description = extraFile
652831
});
653832
}
654833
}

0 commit comments

Comments
 (0)