From 8d2684ba57a3ceaa3091bf03c5ac7b93c52e9821 Mon Sep 17 00:00:00 2001 From: Tamas Vajk Date: Mon, 17 Jun 2024 13:59:35 +0200 Subject: [PATCH] C#: Adjust trap location, database ID and archiving of generated sources --- .../Semmle.Extraction.CSharp/Entities/File.cs | 11 +++- .../Extractor/Analyser.cs | 3 +- .../Extractor/BinaryLogAnalyser.cs | 12 +++- .../Extractor/Extractor.cs | 12 +++- .../Semmle.Extraction/Entities/File.cs | 3 +- .../Extractor/ExtractionContext.cs | 55 ++++++++++++++++ .../extractor/Semmle.Extraction/TrapWriter.cs | 65 +++++++++---------- .../all-platforms/binlog/Files.expected | 2 +- 8 files changed, 120 insertions(+), 43 deletions(-) diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Entities/File.cs b/csharp/extractor/Semmle.Extraction.CSharp/Entities/File.cs index e0940191a3141..5f1704e110ea8 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Entities/File.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Entities/File.cs @@ -34,7 +34,14 @@ public override void Populate(TextWriter trapFile) lineCounts.Total++; trapFile.numlines(this, lineCounts); - Context.TrapWriter.Archive(originalPath, TransformedPath, text.Encoding ?? System.Text.Encoding.Default); + if (BinaryLogExtractionContext.GetAdjustedPath(Context.ExtractionContext, originalPath) is not null) + { + Context.TrapWriter.ArchiveContent(rawText, TransformedPath); + } + else + { + Context.TrapWriter.Archive(originalPath, TransformedPath, text.Encoding ?? System.Text.Encoding.Default); + } } } else if (IsPossiblyTextFile()) @@ -70,7 +77,7 @@ private bool IsPossiblyTextFile() return !extension.Equals("dll") && !extension.Equals("exe"); } - public static File Create(Context cx, string path) => FileFactory.Instance.CreateEntity(cx, (typeof(File), path), path); + public static File Create(Context cx, string path) => FileFactory.Instance.CreateEntity(cx, (typeof(File), path), path); // TODO: we should figure out if the path belongs to a source generated tree. If so, we should modify the path to point to a unique file location private class FileFactory : CachedEntityFactory { diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Analyser.cs b/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Analyser.cs index a57c269128eca..47229c57b2066 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Analyser.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Analyser.cs @@ -185,7 +185,8 @@ private void DoExtractTree(SyntaxTree tree) { var stopwatch = new Stopwatch(); stopwatch.Start(); - var sourcePath = tree.FilePath; + var sourcePath = BinaryLogExtractionContext.GetAdjustedPath(ExtractionContext, tree.FilePath) ?? tree.FilePath; + var transformedSourcePath = PathTransformer.Transform(sourcePath); var trapPath = transformedSourcePath.GetTrapPath(Logger, options.TrapCompression); diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Extractor/BinaryLogAnalyser.cs b/csharp/extractor/Semmle.Extraction.CSharp/Extractor/BinaryLogAnalyser.cs index c59966072db35..6026778f2f7e5 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Extractor/BinaryLogAnalyser.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Extractor/BinaryLogAnalyser.cs @@ -1,3 +1,4 @@ +using System.Collections.Generic; using Microsoft.CodeAnalysis.CSharp; using Semmle.Util; using Semmle.Util.Logging; @@ -11,10 +12,15 @@ public BinaryLogAnalyser(IProgressMonitor pm, ILogger logger, PathTransformer pa { } - public void Initialize(string cwd, string[] args, string outputPath, CSharpCompilation compilationIn, CommonOptions options) + public void Initialize( + string cwd, string[] args, string outputPath, CSharpCompilation compilation, + IEnumerable generatedSyntaxTrees, + string compilationIdentifier, CommonOptions options) { - compilation = compilationIn; - ExtractionContext = new ExtractionContext(cwd, args, outputPath, [], Logger, PathTransformer, ExtractorMode.BinaryLog, options.QlTest); + base.compilation = compilation; + ExtractionContext = new BinaryLogExtractionContext( + cwd, args, outputPath, generatedSyntaxTrees, compilationIdentifier, + Logger, PathTransformer, options.QlTest); this.options = options; LogExtractorInfo(); SetReferencePaths(); diff --git a/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Extractor.cs b/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Extractor.cs index a05a2d1ffb20e..e82ce39aa8a7a 100644 --- a/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Extractor.cs +++ b/csharp/extractor/Semmle.Extraction.CSharp/Extractor/Extractor.cs @@ -159,13 +159,23 @@ static bool filter(CompilerCall compilerCall) var compilerArgs = compilerCall.GetArguments(); var args = reader.ReadCommandLineArguments(compilerCall); + // Generated syntax trees are always added to the end of the list of syntax trees. + var generatedSyntaxTrees = compilation.SyntaxTrees.Skip(compilationData.Compilation.SyntaxTrees.Count()); + using var analyser = new BinaryLogAnalyser(new LogProgressMonitor(logger), logger, pathTransformer, canonicalPathCache, options.AssemblySensitiveTrap); var exit = Analyse(stopwatch, analyser, options, references => [() => compilation.References.ForEach(r => references.Add(r))], (analyser, syntaxTrees) => [() => syntaxTrees.AddRange(compilation.SyntaxTrees)], (syntaxTrees, references) => compilation, - (compilation, options) => analyser.Initialize(compilerCall.ProjectDirectory, compilerArgs?.ToArray() ?? [], TracingAnalyser.GetOutputName(compilation, args), compilation, options), + (compilation, options) => analyser.Initialize( + compilerCall.ProjectDirectory, + compilerArgs?.ToArray() ?? [], + TracingAnalyser.GetOutputName(compilation, args), + compilation, + generatedSyntaxTrees, + diagnosticName, + options), () => { }); switch (exit) diff --git a/csharp/extractor/Semmle.Extraction/Entities/File.cs b/csharp/extractor/Semmle.Extraction/Entities/File.cs index b703362feb8eb..dda965920ae14 100644 --- a/csharp/extractor/Semmle.Extraction/Entities/File.cs +++ b/csharp/extractor/Semmle.Extraction/Entities/File.cs @@ -8,7 +8,8 @@ protected File(Context cx, string path) : base(cx, path) { originalPath = path; - transformedPathLazy = new Lazy(() => Context.ExtractionContext.PathTransformer.Transform(originalPath)); + var adjustedPath = BinaryLogExtractionContext.GetAdjustedPath(Context.ExtractionContext, originalPath) ?? path; + transformedPathLazy = new Lazy(() => Context.ExtractionContext.PathTransformer.Transform(adjustedPath)); } protected readonly string originalPath; diff --git a/csharp/extractor/Semmle.Extraction/Extractor/ExtractionContext.cs b/csharp/extractor/Semmle.Extraction/Extractor/ExtractionContext.cs index 3628e4bc80eaf..cfc8ea6a54817 100644 --- a/csharp/extractor/Semmle.Extraction/Extractor/ExtractionContext.cs +++ b/csharp/extractor/Semmle.Extraction/Extractor/ExtractionContext.cs @@ -1,9 +1,64 @@ using System.Collections.Generic; +using System.IO; +using System.Linq; +using Microsoft.CodeAnalysis; using Semmle.Util.Logging; using CompilationInfo = (string key, string value); namespace Semmle.Extraction { + public class BinaryLogExtractionContext : ExtractionContext + { + private readonly IEnumerable generatedSyntaxTrees; + private readonly string compilationIdentifier; + private readonly string generatedFolderName; + + public BinaryLogExtractionContext(string cwd, string[] args, string outputPath, + IEnumerable generatedSyntaxTrees, string compilationIdentifier, + ILogger logger, PathTransformer pathTransformer, bool isQlTest) + : base(cwd, args, outputPath, [], logger, pathTransformer, ExtractorMode.BinaryLog, isQlTest) + { + this.generatedSyntaxTrees = generatedSyntaxTrees; + this.compilationIdentifier = compilationIdentifier; + + // Compute a unique folder name for the generated files: + generatedFolderName = "generated"; + + if (Directory.Exists(generatedFolderName)) + { + var counter = 0; + do + { + generatedFolderName = $"generated{counter++}"; + } + while (Directory.Exists(generatedFolderName)); + } + } + + private string? GetAdjustedPath(string path) + { + var syntaxTree = generatedSyntaxTrees.FirstOrDefault(t => t.FilePath == path); + if (syntaxTree is null) + { + return null; + } + + return Path.Join(generatedFolderName, compilationIdentifier, path); + } + + public static string? GetAdjustedPath(ExtractionContext extractionContext, string sourcePath) + { + if (extractionContext.Mode.HasFlag(ExtractorMode.BinaryLog) + && extractionContext is BinaryLogExtractionContext binaryLogExtractionContext + && binaryLogExtractionContext.GetAdjustedPath(sourcePath) is string adjustedPath) + { + return adjustedPath; + } + + return null; + } + } + /// /// Implementation of the main extractor state. /// diff --git a/csharp/extractor/Semmle.Extraction/TrapWriter.cs b/csharp/extractor/Semmle.Extraction/TrapWriter.cs index 84e8a7e64402f..7aedba9bd2c06 100644 --- a/csharp/extractor/Semmle.Extraction/TrapWriter.cs +++ b/csharp/extractor/Semmle.Extraction/TrapWriter.cs @@ -105,14 +105,42 @@ public TrapWriter(ILogger logger, PathTransformer.ITransformedPath outputfile, s /// The transformed path to the input file. /// The encoding used by the input file. public void Archive(string originalPath, PathTransformer.ITransformedPath transformedPath, Encoding inputEncoding) + { + Archive(() => + { + var fullInputPath = Path.GetFullPath(originalPath); + return File.ReadAllText(fullInputPath, inputEncoding); + }, transformedPath); + } + + public void ArchiveContent(string contents, PathTransformer.ITransformedPath transformedPath) + { + Archive(() => contents, transformedPath); + } + + private void Archive(Func getContent, PathTransformer.ITransformedPath transformedPath) { if (string.IsNullOrEmpty(archive)) + { return; + } - // Calling GetFullPath makes this use the canonical capitalisation, if the file exists. - var fullInputPath = Path.GetFullPath(originalPath); + var dest = FileUtils.NestPaths(logger, archive, transformedPath.Value); + try + { + var tmpSrcFile = Path.GetTempFileName(); + File.WriteAllText(tmpSrcFile, getContent(), utf8); - ArchivePath(fullInputPath, transformedPath, inputEncoding); + FileUtils.MoveOrReplace(tmpSrcFile, dest); + } + catch (Exception ex) + { + // If this happened, it was probably because + // - the same file was compiled multiple times, or + // - the file doesn't exist (due to wrong #line directive or because it's an in-memory source generated AST). + // In any case, this is not a fatal error. + logger.LogWarning("Problem archiving " + dest + ": " + ex); + } } /// @@ -185,37 +213,6 @@ public void Emit(ITrapEmitter emitter) emitter.EmitTrap(Writer); } - /// - /// Attempts to archive the specified input file to the normal area of the source archive. - /// The file's path must be sufficiently short so as to render the path of its copy in the - /// source archive less than the system path limit of 260 characters. - /// - /// The full path to the input file. - /// The transformed path to the input file. - /// The encoding used by the input file. - /// If the output path in the source archive would - /// exceed the system path limit of 260 characters. - private void ArchivePath(string fullInputPath, PathTransformer.ITransformedPath transformedPath, Encoding inputEncoding) - { - var dest = FileUtils.NestPaths(logger, archive, transformedPath.Value); - try - { - var contents = File.ReadAllText(fullInputPath, inputEncoding); - var tmpSrcFile = Path.GetTempFileName(); - File.WriteAllText(tmpSrcFile, contents, utf8); - - FileUtils.MoveOrReplace(tmpSrcFile, dest); - } - catch (Exception ex) - { - // If this happened, it was probably because - // - the same file was compiled multiple times, or - // - the file doesn't exist (due to wrong #line directive or because it's an in-memory source generated AST). - // In any case, this is not a fatal error. - logger.LogWarning("Problem archiving " + dest + ": " + ex); - } - } - private static string TrapExtension(CompressionMode compression) { switch (compression) diff --git a/csharp/ql/integration-tests/all-platforms/binlog/Files.expected b/csharp/ql/integration-tests/all-platforms/binlog/Files.expected index 03e3212cb773e..7c795989ba8ed 100644 --- a/csharp/ql/integration-tests/all-platforms/binlog/Files.expected +++ b/csharp/ql/integration-tests/all-platforms/binlog/Files.expected @@ -1,5 +1,5 @@ | Program.cs:0:0:0:0 | Program.cs | -| System.Text.RegularExpressions.Generator/System.Text.RegularExpressions.Generator.RegexGenerator/RegexGenerator.g.cs:0:0:0:0 | System.Text.RegularExpressions.Generator/System.Text.RegularExpressions.Generator.RegexGenerator/RegexGenerator.g.cs | +| generated/test.csproj (net8.0)/System.Text.RegularExpressions.Generator/System.Text.RegularExpressions.Generator.RegexGenerator/RegexGenerator.g.cs:0:0:0:0 | generated/test.csproj (net8.0)/System.Text.RegularExpressions.Generator/System.Text.RegularExpressions.Generator.RegexGenerator/RegexGenerator.g.cs | | obj/Debug/net8.0/.NETCoreApp,Version=v8.0.AssemblyAttributes.cs:0:0:0:0 | obj/Debug/net8.0/.NETCoreApp,Version=v8.0.AssemblyAttributes.cs | | obj/Debug/net8.0/test.AssemblyInfo.cs:0:0:0:0 | obj/Debug/net8.0/test.AssemblyInfo.cs | | obj/Debug/net8.0/test.GlobalUsings.g.cs:0:0:0:0 | obj/Debug/net8.0/test.GlobalUsings.g.cs |