Skip to content

Commit f3f3e1e

Browse files
authored
Create LinuxApplicationLayerDetector (#1551)
* Create `LinuxApplicationLayerDetector` * Code formatting * Include npm and pip detectors in experiment * Increment LinuxContainerDetector version * Add support for filtering by component type * Fix LinuxContainerDetectorTests
1 parent e7e4a99 commit f3f3e1e

File tree

10 files changed

+947
-167
lines changed

10 files changed

+947
-167
lines changed

.editorconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -463,6 +463,9 @@ dotnet_diagnostic.SA1623.severity = none
463463
# https://github.com/DotNetAnalyzers/StyleCopAnalyzers
464464
##########################################
465465

466+
dotnet_diagnostic.SA1009.severity = none
467+
dotnet_diagnostic.SA1111.severity = none
468+
466469
# https://github.com/DotNetAnalyzers/StyleCopAnalyzers/blob/master/documentation/SA1600.md
467470
# Elements should be documented
468471
dotnet_diagnostic.SA1600.severity = suggestion

src/Microsoft.ComponentDetection.Detectors/linux/ILinuxScanner.cs

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ namespace Microsoft.ComponentDetection.Detectors.Linux;
55
using System.Threading;
66
using System.Threading.Tasks;
77
using Microsoft.ComponentDetection.Contracts.BcdeModels;
8+
using Microsoft.ComponentDetection.Contracts.TypedComponent;
89

910
/// <summary>
1011
/// Interface for scanning Linux container layers to identify components.
@@ -17,7 +18,14 @@ public interface ILinuxScanner
1718
/// <param name="imageHash">The hash identifier of the container image to scan.</param>
1819
/// <param name="containerLayers">The collection of Docker layers that make up the container image.</param>
1920
/// <param name="baseImageLayerCount">The number of layers that belong to the base image, used to distinguish base image layers from application layers.</param>
21+
/// <param name="enabledComponentTypes">The set of component types to include in the scan results. Only components matching these types will be returned.</param>
2022
/// <param name="cancellationToken">A token to monitor for cancellation requests. The default value is <see cref="CancellationToken.None"/>.</param>
2123
/// <returns>A task that represents the asynchronous operation. The task result contains a collection of <see cref="LayerMappedLinuxComponents"/> representing the components found in the image and their associated layers.</returns>
22-
public Task<IEnumerable<LayerMappedLinuxComponents>> ScanLinuxAsync(string imageHash, IEnumerable<DockerLayer> containerLayers, int baseImageLayerCount, CancellationToken cancellationToken = default);
24+
public Task<IEnumerable<LayerMappedLinuxComponents>> ScanLinuxAsync(
25+
string imageHash,
26+
IEnumerable<DockerLayer> containerLayers,
27+
int baseImageLayerCount,
28+
ISet<ComponentType> enabledComponentTypes,
29+
CancellationToken cancellationToken = default
30+
);
2331
}
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
#nullable disable
2+
namespace Microsoft.ComponentDetection.Detectors.Linux;
3+
4+
using System;
5+
using System.Collections.Generic;
6+
using Microsoft.ComponentDetection.Contracts;
7+
using Microsoft.ComponentDetection.Contracts.TypedComponent;
8+
using Microsoft.Extensions.Logging;
9+
10+
/// <summary>
11+
/// Experimental detector for Linux container images that captures application-level packages (npm and pip)
12+
/// in addition to system packages. This detector runs as an experiment to compare results with the base
13+
/// Linux detector (which only scans system packages).
14+
/// </summary>
15+
/// <param name="linuxScanner">The Linux scanner service.</param>
16+
/// <param name="dockerService">The Docker service.</param>
17+
/// <param name="logger">The logger.</param>
18+
public class LinuxApplicationLayerDetector(
19+
ILinuxScanner linuxScanner,
20+
IDockerService dockerService,
21+
ILogger<LinuxApplicationLayerDetector> logger
22+
) : LinuxContainerDetector(linuxScanner, dockerService, logger), IExperimentalDetector
23+
{
24+
/// <inheritdoc/>
25+
public new string Id => "LinuxApplicationLayer";
26+
27+
/// <inheritdoc/>
28+
public new IEnumerable<string> Categories =>
29+
[
30+
Enum.GetName(typeof(DetectorClass), DetectorClass.Linux),
31+
Enum.GetName(typeof(DetectorClass), DetectorClass.Npm),
32+
Enum.GetName(typeof(DetectorClass), DetectorClass.Pip),
33+
];
34+
35+
/// <inheritdoc/>
36+
public new IEnumerable<ComponentType> SupportedComponentTypes =>
37+
[ComponentType.Linux, ComponentType.Npm, ComponentType.Pip];
38+
39+
/// <inheritdoc/>
40+
protected override ISet<ComponentType> GetEnabledComponentTypes() =>
41+
new HashSet<ComponentType> { ComponentType.Linux, ComponentType.Npm, ComponentType.Pip };
42+
}

src/Microsoft.ComponentDetection.Detectors/linux/LinuxContainerDetector.cs

Lines changed: 163 additions & 81 deletions
Large diffs are not rendered by default.

src/Microsoft.ComponentDetection.Detectors/linux/LinuxScanner.cs

Lines changed: 79 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,8 @@ namespace Microsoft.ComponentDetection.Detectors.Linux;
2121
/// </summary>
2222
public class LinuxScanner : ILinuxScanner
2323
{
24-
private const string ScannerImage = "governancecontainerregistry.azurecr.io/syft:v1.37.0@sha256:48d679480c6d272c1801cf30460556959c01d4826795be31d4fd8b53750b7d91";
24+
private const string ScannerImage =
25+
"governancecontainerregistry.azurecr.io/syft:v1.37.0@sha256:48d679480c6d272c1801cf30460556959c01d4826795be31d4fd8b53750b7d91";
2526

2627
private static readonly IList<string> CmdParameters =
2728
[
@@ -34,13 +35,19 @@ public class LinuxScanner : ILinuxScanner
3435

3536
private static readonly SemaphoreSlim ContainerSemaphore = new SemaphoreSlim(2);
3637

37-
private static readonly int SemaphoreTimeout = Convert.ToInt32(TimeSpan.FromHours(1).TotalMilliseconds);
38+
private static readonly int SemaphoreTimeout = Convert.ToInt32(
39+
TimeSpan.FromHours(1).TotalMilliseconds
40+
);
3841

3942
private readonly IDockerService dockerService;
4043
private readonly ILogger<LinuxScanner> logger;
4144
private readonly IEnumerable<IArtifactComponentFactory> componentFactories;
4245
private readonly IEnumerable<IArtifactFilter> artifactFilters;
4346
private readonly Dictionary<string, IArtifactComponentFactory> factoryLookup;
47+
private readonly Dictionary<
48+
ComponentType,
49+
IArtifactComponentFactory
50+
> componentTypeToFactoryLookup;
4451

4552
/// <summary>
4653
/// Initializes a new instance of the <see cref="LinuxScanner"/> class.
@@ -53,7 +60,8 @@ public LinuxScanner(
5360
IDockerService dockerService,
5461
ILogger<LinuxScanner> logger,
5562
IEnumerable<IArtifactComponentFactory> componentFactories,
56-
IEnumerable<IArtifactFilter> artifactFilters)
63+
IEnumerable<IArtifactFilter> artifactFilters
64+
)
5765
{
5866
this.dockerService = dockerService;
5967
this.logger = logger;
@@ -69,10 +77,27 @@ public LinuxScanner(
6977
this.factoryLookup[artifactType] = factory;
7078
}
7179
}
80+
81+
// Build a lookup dictionary for component type to factory mapping
82+
this.componentTypeToFactoryLookup = new Dictionary<ComponentType, IArtifactComponentFactory>
83+
{
84+
{
85+
ComponentType.Linux,
86+
componentFactories.FirstOrDefault(f => f is LinuxComponentFactory)
87+
},
88+
{ ComponentType.Npm, componentFactories.FirstOrDefault(f => f is NpmComponentFactory) },
89+
{ ComponentType.Pip, componentFactories.FirstOrDefault(f => f is PipComponentFactory) },
90+
};
7291
}
7392

7493
/// <inheritdoc/>
75-
public async Task<IEnumerable<LayerMappedLinuxComponents>> ScanLinuxAsync(string imageHash, IEnumerable<DockerLayer> containerLayers, int baseImageLayerCount, CancellationToken cancellationToken = default)
94+
public async Task<IEnumerable<LayerMappedLinuxComponents>> ScanLinuxAsync(
95+
string imageHash,
96+
IEnumerable<DockerLayer> containerLayers,
97+
int baseImageLayerCount,
98+
ISet<ComponentType> enabledComponentTypes,
99+
CancellationToken cancellationToken = default
100+
)
76101
{
77102
using var record = new LinuxScannerTelemetryRecord
78103
{
@@ -93,8 +118,14 @@ public async Task<IEnumerable<LayerMappedLinuxComponents>> ScanLinuxAsync(string
93118
{
94119
try
95120
{
96-
var command = new List<string> { imageHash }.Concat(CmdParameters).ToList();
97-
(stdout, stderr) = await this.dockerService.CreateAndRunContainerAsync(ScannerImage, command, cancellationToken);
121+
var command = new List<string> { imageHash }
122+
.Concat(CmdParameters)
123+
.ToList();
124+
(stdout, stderr) = await this.dockerService.CreateAndRunContainerAsync(
125+
ScannerImage,
126+
command,
127+
cancellationToken
128+
);
98129
}
99130
catch (Exception e)
100131
{
@@ -106,7 +137,10 @@ public async Task<IEnumerable<LayerMappedLinuxComponents>> ScanLinuxAsync(string
106137
else
107138
{
108139
record.SemaphoreFailure = true;
109-
this.logger.LogWarning("Failed to enter the container semaphore for image {ImageHash}", imageHash);
140+
this.logger.LogWarning(
141+
"Failed to enter the container semaphore for image {ImageHash}",
142+
imageHash
143+
);
110144
}
111145
}
112146
finally
@@ -123,14 +157,13 @@ public async Task<IEnumerable<LayerMappedLinuxComponents>> ScanLinuxAsync(string
123157
if (string.IsNullOrWhiteSpace(stdout) || !string.IsNullOrWhiteSpace(stderr))
124158
{
125159
throw new InvalidOperationException(
126-
$"Scan failed with exit info: {stdout}{System.Environment.NewLine}{stderr}");
160+
$"Scan failed with exit info: {stdout}{System.Environment.NewLine}{stderr}"
161+
);
127162
}
128163

129164
var layerDictionary = containerLayers
130165
.DistinctBy(layer => layer.DiffId)
131-
.ToDictionary(
132-
layer => layer.DiffId,
133-
_ => new List<TypedComponent>());
166+
.ToDictionary(layer => layer.DiffId, _ => new List<TypedComponent>());
134167

135168
try
136169
{
@@ -143,10 +176,25 @@ public async Task<IEnumerable<LayerMappedLinuxComponents>> ScanLinuxAsync(string
143176
validArtifacts = filter.Filter(validArtifacts, syftOutput.Distro);
144177
}
145178

146-
// Create components using factories
179+
// Build a set of enabled factories based on requested component types
180+
var enabledFactories = new HashSet<IArtifactComponentFactory>();
181+
foreach (var componentType in enabledComponentTypes)
182+
{
183+
if (
184+
this.componentTypeToFactoryLookup.TryGetValue(componentType, out var factory)
185+
&& factory != null
186+
)
187+
{
188+
enabledFactories.Add(factory);
189+
}
190+
}
191+
192+
// Create components using only enabled factories
147193
var componentsWithLayers = validArtifacts
148194
.DistinctBy(artifact => (artifact.Name, artifact.Version, artifact.Type))
149-
.Select(artifact => this.CreateComponentWithLayers(artifact, syftOutput.Distro))
195+
.Select(artifact =>
196+
this.CreateComponentWithLayers(artifact, syftOutput.Distro, enabledFactories)
197+
)
150198
.Where(result => result.Component != null)
151199
.ToList();
152200

@@ -159,7 +207,10 @@ public async Task<IEnumerable<LayerMappedLinuxComponents>> ScanLinuxAsync(string
159207

160208
if (unsupportedTypes.Count > 0)
161209
{
162-
this.logger.LogDebug("Encountered unsupported artifact types: {UnsupportedTypes}", string.Join(", ", unsupportedTypes));
210+
this.logger.LogDebug(
211+
"Encountered unsupported artifact types: {UnsupportedTypes}",
212+
string.Join(", ", unsupportedTypes)
213+
);
163214
}
164215

165216
// Map components to layers
@@ -179,7 +230,9 @@ public async Task<IEnumerable<LayerMappedLinuxComponents>> ScanLinuxAsync(string
179230
});
180231

181232
// Track detected components in telemetry
182-
syftTelemetryRecord.Components = JsonConvert.SerializeObject(componentsWithLayers.Select(c => c.Component.Id));
233+
syftTelemetryRecord.Components = JsonConvert.SerializeObject(
234+
componentsWithLayers.Select(c => c.Component.Id)
235+
);
183236

184237
return layerMappedLinuxComponents;
185238
}
@@ -190,13 +243,23 @@ public async Task<IEnumerable<LayerMappedLinuxComponents>> ScanLinuxAsync(string
190243
}
191244
}
192245

193-
private (TypedComponent Component, IEnumerable<string> LayerIds) CreateComponentWithLayers(ArtifactElement artifact, Distro distro)
246+
private (TypedComponent Component, IEnumerable<string> LayerIds) CreateComponentWithLayers(
247+
ArtifactElement artifact,
248+
Distro distro,
249+
HashSet<IArtifactComponentFactory> enabledFactories
250+
)
194251
{
195252
if (!this.factoryLookup.TryGetValue(artifact.Type, out var factory))
196253
{
197254
return (null, []);
198255
}
199256

257+
// Skip this artifact if its factory is not in the enabled set
258+
if (!enabledFactories.Contains(factory))
259+
{
260+
return (null, []);
261+
}
262+
200263
var component = factory.CreateComponent(artifact, distro);
201264
if (component == null)
202265
{
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
namespace Microsoft.ComponentDetection.Orchestrator.Experiments.Configs;
2+
3+
using Microsoft.ComponentDetection.Contracts;
4+
using Microsoft.ComponentDetection.Detectors.Linux;
5+
using Microsoft.ComponentDetection.Detectors.Npm;
6+
using Microsoft.ComponentDetection.Detectors.Pip;
7+
8+
/// <summary>
9+
/// Experiment to validate the <see cref="LinuxApplicationLayerDetector"/> which captures application-level
10+
/// packages in addition to system packages from Linux containers.
11+
/// Control group includes the standard file-based npm and pip detectors plus the Linux system package detector.
12+
/// Experiment group uses container-based detection for all package types together.
13+
/// </summary>
14+
public class LinuxApplicationLayerExperiment : IExperimentConfiguration
15+
{
16+
/// <inheritdoc />
17+
public string Name => "LinuxApplicationLayer";
18+
19+
/// <inheritdoc />
20+
public bool IsInControlGroup(IComponentDetector componentDetector) =>
21+
componentDetector
22+
is (LinuxContainerDetector and not LinuxApplicationLayerDetector)
23+
or NpmComponentDetector
24+
or NpmLockfileDetectorBase
25+
or PipReportComponentDetector;
26+
27+
/// <inheritdoc />
28+
public bool IsInExperimentGroup(IComponentDetector componentDetector) =>
29+
componentDetector is LinuxApplicationLayerDetector;
30+
31+
/// <inheritdoc />
32+
public bool ShouldRecord(IComponentDetector componentDetector, int numComponents) => true;
33+
}

src/Microsoft.ComponentDetection.Orchestrator/Extensions/ServiceCollectionExtensions.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,7 @@ public static IServiceCollection AddComponentDetection(this IServiceCollection s
7272
services.AddSingleton<IExperimentProcessor, DefaultExperimentProcessor>();
7373
services.AddSingleton<IExperimentConfiguration, SimplePipExperiment>();
7474
services.AddSingleton<IExperimentConfiguration, UvLockDetectorExperiment>();
75+
services.AddSingleton<IExperimentConfiguration, LinuxApplicationLayerExperiment>();
7576

7677
// Detectors
7778
// CocoaPods
@@ -106,6 +107,7 @@ public static IServiceCollection AddComponentDetection(this IServiceCollection s
106107
services.AddSingleton<IArtifactComponentFactory, PipComponentFactory>();
107108
services.AddSingleton<IArtifactFilter, Mariner2ArtifactFilter>();
108109
services.AddSingleton<IComponentDetector, LinuxContainerDetector>();
110+
services.AddSingleton<IComponentDetector, LinuxApplicationLayerDetector>();
109111

110112
// Maven
111113
services.AddSingleton<IMavenCommandService, MavenCommandService>();

0 commit comments

Comments
 (0)