Skip to content

Commit f069f8f

Browse files
authored
Refactor hash DB formats and protect against "full game backup" (#3868)
* Refactor GOG client to include build details, manifest handling, and hash indexing; enhance GOG depot and file stream operations. Add new GOG models, exceptions, and utilities for manifest and hash management. * Add GOG language support, manifest models, and depot enhancements; implement default language filter, refine manifest-handling logic, and update hash generation processes. * Integrate file hash queries for Steam and GOG; enhance `FileHashesService` with locator ID resolution, add database query dependencies, and implement SQL macros for depot and build handling. * Enforce 2GB maximum backup size in `ALoadoutSynchronizer` and add validation for total archived file size. * Fix steam indexing and db generation * Add unimplemented `GetLocatorIdsForGame` method in `StubbedFileHasherService` * Update GOG schema with additional attributes for depots and manifests; adjust statistics and fingerprint * Adjust `TestsFor_0004_RemoveGameFiles` to handle database-specific conditions for `LocatorIds`.
1 parent a939620 commit f069f8f

File tree

20 files changed

+558
-180
lines changed

20 files changed

+558
-180
lines changed

src/NexusMods.Abstractions.GOG/DTOs/DepotInfo.cs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,13 @@ public class DepotItem
3939
[JsonPropertyName("path")]
4040
public required RelativePath Path { get; init; }
4141

42+
43+
/// <summary>
44+
/// The MD5 hash of the file.
45+
/// </summary>
46+
[JsonPropertyName("md5")]
47+
public Md5Value? Md5 { get; init; }
48+
4249
/// <summary>
4350
/// The chunks in the file.
4451
/// </summary>

src/NexusMods.Abstractions.GOG/IClient.cs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,14 +36,20 @@ public interface IClient
3636
/// </summary>
3737
public Task<Build[]> GetBuilds(ProductId productId, OS os, CancellationToken token);
3838

39+
40+
/// <summary>
41+
/// Gets the details for a build, can include multiple depots, some of which may come from other productIds
42+
/// </summary>
43+
public Task<BuildDetails> GetBuildDetails(Build build, CancellationToken token);
44+
3945
/// <summary>
4046
/// Get the depot information for a build.
4147
/// </summary>
42-
public Task<DepotInfo> GetDepot(Build build, CancellationToken token);
48+
public Task<DepotInfo> GetDepot(BuildDetailsDepot depot, CancellationToken token);
4349

4450
/// <summary>
4551
/// Given a depot, a build, and a path, return a stream to the file. This file is seekable, and will cache and
4652
/// stream in data as required from the CDN.
4753
/// </summary>
48-
public Task<Stream> GetFileStream(Build build, DepotInfo depotInfo, RelativePath path, CancellationToken token);
54+
public Task<Stream> GetFileStream(ProductId productId, DepotInfo depotInfo, RelativePath path, CancellationToken token);
4955
}

src/NexusMods.Abstractions.Games.FileHashes/IFileHashesService.cs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ public interface IFileHashesService
5858
/// Suggest version data for a given game installation and files.
5959
/// </summary>
6060
public Optional<VersionData> SuggestVersionData(GameInstallation gameInstallation, IEnumerable<(GamePath Path, Hash Hash)> files);
61+
62+
LocatorId[] GetLocatorIdsForGame(GameInstallation loadoutInstallationInstance);
6163
}
6264

6365
/// <summary>

src/NexusMods.Abstractions.Games.FileHashes/Models/GogBuild.cs

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,18 @@ public partial class GogBuild : IModelDefinition
1515
/// <summary>
1616
/// The GOG build ID.
1717
/// </summary>
18-
public static readonly BuildIdAttribute BuildId = new(Namespace, nameof(BuildId)) { IsIndexed = true };
18+
public static readonly BuildIdAttribute BuildId = new(Namespace, nameof(BuildId)) { IsIndexed = true, IsOptional = true };
1919

2020
/// <summary>
2121
/// The GOG product ID.
2222
/// </summary>
2323
public static readonly ProductIdAttribute ProductId = new(Namespace, nameof(ProductId)) { IsIndexed = true };
2424

25+
/// <summary>
26+
/// The unique manifest ID for this build.
27+
/// </summary>
28+
public static readonly StringAttribute ManifestId = new(Namespace, nameof(ManifestId)) { IsIndexed = true };
29+
2530
/// <summary>
2631
/// The Operating System the build is for.
2732
/// </summary>
@@ -43,7 +48,12 @@ public partial class GogBuild : IModelDefinition
4348
public static readonly BooleanAttribute Public = new(Namespace, nameof(Public));
4449

4550
/// <summary>
46-
/// The files in the GOG build.
51+
/// (Deprecated) The files in the GOG build, use Depots instead.
4752
/// </summary>
4853
public static readonly ReferencesAttribute<PathHashRelation> Files = new(Namespace, nameof(Files));
54+
55+
/// <summary>
56+
/// All the depots in this build
57+
/// </summary>
58+
public static readonly ReferencesAttribute<GogDepot> Depots = new(Namespace, nameof(Depots));
4959
}
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
using NexusMods.Abstractions.Games.FileHashes.Attributes.Gog;
2+
using NexusMods.Abstractions.GOG.Values;
3+
using NexusMods.MnemonicDB.Abstractions.Attributes;
4+
using NexusMods.MnemonicDB.Abstractions.Models;
5+
6+
namespace NexusMods.Abstractions.Games.FileHashes.Models;
7+
8+
public partial class GogDepot : IModelDefinition
9+
{
10+
private const string Namespace = "NexusMods.Abstractions.Games.FileHashes.GogDepot";
11+
12+
public static readonly ProductIdAttribute ProductId = new(Namespace, nameof(ProductId)) { IsIndexed = true };
13+
14+
public static readonly SizeAttribute Size = new(Namespace, nameof(Size));
15+
16+
public static readonly SizeAttribute CompressedSize = new(Namespace, nameof(CompressedSize));
17+
18+
/// <summary>
19+
/// The manifest pointed to by this depot
20+
/// </summary>
21+
public static readonly ReferenceAttribute<GogManifest> Manifest = new(Namespace, nameof(Manifest));
22+
23+
/// <summary>
24+
/// The languages in this depot.
25+
/// </summary>
26+
public static readonly StringsAttribute Languages = new(Namespace, nameof(Languages)) { IsIndexed = false };
27+
}
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
using NexusMods.MnemonicDB.Abstractions.Attributes;
2+
using NexusMods.MnemonicDB.Abstractions.Models;
3+
4+
namespace NexusMods.Abstractions.Games.FileHashes.Models;
5+
6+
public partial class GogManifest : IModelDefinition
7+
{
8+
private const string Namespace = "NexusMods.Abstractions.Games.FileHashes.GogManifest";
9+
10+
/// <summary>
11+
/// The (unique) primary key of the manifest.
12+
/// </summary>
13+
public static readonly StringAttribute ManifestId = new(Namespace, nameof(ManifestId)) { IsIndexed = true };
14+
15+
/// <summary>
16+
/// The files in the manifest
17+
/// </summary>
18+
public static readonly ReferencesAttribute<PathHashRelation> Files = new(Namespace, nameof(Files));
19+
}

src/NexusMods.Abstractions.Loadouts.Synchronizers/ALoadoutSynchronizer.cs

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
using NexusMods.Sdk;
3232
using NexusMods.Sdk.FileStore;
3333
using NexusMods.Sdk.IO;
34+
using ReactiveUI;
3435
using OneOf;
3536
using Reloaded.Memory.Extensions;
3637

@@ -45,6 +46,12 @@ namespace NexusMods.Abstractions.Loadouts.Synchronizers;
4546
[PublicAPI]
4647
public class ALoadoutSynchronizer : ILoadoutSynchronizer
4748
{
49+
/// <summary>
50+
/// We'll limit backups to 2GB, for now we should never see much more than this
51+
/// of modified game files. s
52+
/// </summary>
53+
private static Size MaximumBackupSize => Size.GB * 2;
54+
4855
private readonly ScopedAsyncLock _lock = new();
4956
private readonly IFileStore _fileStore;
5057

@@ -571,6 +578,10 @@ public void ProcessSyncTree(Dictionary<GamePath, SyncNode> tree)
571578

572579
loadout = await ReprocessOverrides(loadout);
573580

581+
job?.SetStatus("Archive Cleanup");
582+
await _garbageCollectorRunner.RunAsync();
583+
584+
574585
return loadout;
575586
}
576587

@@ -1314,6 +1325,10 @@ await Parallel.ForEachAsync(files, async (item, _) =>
13141325
}
13151326
);
13161327

1328+
var totalSize = archivedFiles.Sum(static x => x.Size);
1329+
if (totalSize > MaximumBackupSize)
1330+
throw new Exception($"Cannot backup files, total size is {totalSize}, which is larger than the maximum of {MaximumBackupSize}");
1331+
13171332
// PERFORMANCE: We deduplicate above with the HaveFile call.
13181333
await _fileStore.BackupFiles(archivedFiles, deduplicate: false);
13191334

src/NexusMods.DataModel/Services.cs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ public static IServiceCollection AddDataModel(this IServiceCollection coll)
7878
// Game Registry
7979
coll.AddSingleton<IGameRegistry, GameRegistry.GameRegistry>();
8080
coll.AddHostedService(s => (GameRegistry.GameRegistry)s.GetRequiredService<IGameRegistry>());
81-
coll.AddAttributeCollection(typeof(GameInstallMetadata));
81+
coll.AddGameInstallMetadataModel();
8282

8383
// File Store
8484
coll.AddAllSingleton<IFileStore, NxFileStore>();
Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
-- namespace: NexusMods.Games.FileHashes
2+
3+
CREATE SCHEMA IF NOT EXISTS file_hashes;
4+
5+
-- Find all the gog builds that match the given game's files, and rank them by the number of files that match
6+
CREATE MACRO file_hashes.resolve_gog_build(GameMetadataId, DefaultLanguage := 'en-US') AS TABLE
7+
SELECT build.BuildId, ANY_VALUE(build.ProductId) AS BuildProductId, COUNT(*) matching_files, ANY_VALUE(build."version"), list_distinct(LIST(depot.ProductId)) ProductIds
8+
FROM MDB_DISKSTATEENTRY() entry
9+
LEFT JOIN HASHES_HASHRELATION() hashrel on entry.Hash = hashRel.xxHash3
10+
LEFT JOIN HASHES_PATHHASHRELATION() pathrel on pathrel.Path = entry.Path.Item3 AND pathrel.Hash = hashrel.Id
11+
LEFT JOIN (SELECT Id, unnest(Files) FileId FROM HASHES_GOGMANIFEST()) manifest on pathRel.Id = manifest.FileId
12+
LEFT JOIN HASHES_GOGDEPOT() depot on depot.Manifest = Manifest.Id
13+
LEFT JOIN (SELECT Id, unnest(depots) depot, ProductId, buildId, "version" FROM HASHES_GOGBUILD()) build on depot.Id = build.Depot
14+
WHERE entry.Game = GameMetadataId
15+
AND DefaultLanguage in depot.Languages
16+
GROUP BY build.BuildId
17+
ORDER BY COUNT(*) DESC;
18+
19+
-- Find all the steam manifests that match the given game's files, and rank them by the number of files that match
20+
CREATE MACRO file_hashes.resolve_steam_manifests(GameMetadataId) AS TABLE
21+
SELECT ANY_VALUE(steam.depotId) DepotId, COUNT(*) matching_count, ANY_VALUE(steam.AppId) AppId, ANY_VALUE(steam.ManifestId)
22+
FROM MDB_DISKSTATEENTRY() entry
23+
LEFT JOIN HASHES_HASHRELATION() hashrel on entry.Hash = hashRel.xxHash3
24+
LEFT JOIN HASHES_PATHHASHRELATION() pathrel on pathrel.Path = entry.Path.Item3 AND pathrel.Hash = hashrel.Id
25+
LEFT JOIN (SELECT AppId, ManifestId, DepotId, unnest(Files) File FROM HASHES_STEAMMANIFEST()) steam on steam.File = pathrel.Id
26+
WHERE entry.Game = GameMetadataId
27+
GROUP BY steam.ManifestId
28+
ORDER BY COUNT(*) DESC;
29+
30+
-- Find all the depots (LocatorIds) for a given game. This will be the most matching depot for every AppId found in a given game folder
31+
CREATE MACRO file_hashes.resolve_steam_depots(GameMetadataId) AS TABLE
32+
SELECT arg_max(ManifestId, matching_count) DepotId
33+
FROM file_hashes.resolve_steam_manifests(GameMetadataId) manifests
34+
GROUP BY manifests.AppId
35+
Having DepotId is not null;

src/NexusMods.Games.FileHashes/FileHashesService.cs

Lines changed: 81 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,29 @@
88
using NexusMods.Abstractions.GameLocators;
99
using NexusMods.Abstractions.Games.FileHashes;
1010
using NexusMods.Abstractions.Games.FileHashes.Models;
11+
using NexusMods.Abstractions.GOG.Values;
1112
using NexusMods.Abstractions.Jobs;
1213
using NexusMods.Abstractions.NexusWebApi.Types.V2;
1314
using NexusMods.Abstractions.Settings;
1415
using NexusMods.Abstractions.Steam.Values;
1516
using NexusMods.Games.FileHashes.DTOs;
1617
using NexusMods.Hashing.xxHash3;
17-
using NexusMods.MnemonicDB;
18+
using NexusMods.HyperDuck;
1819
using NexusMods.MnemonicDB.Abstractions;
1920
using NexusMods.MnemonicDB.Storage;
2021
using NexusMods.MnemonicDB.Storage.RocksDbBackend;
2122
using NexusMods.Paths;
2223
using NexusMods.Sdk;
2324
using NexusMods.Sdk.IO;
2425
using BuildId = NexusMods.Abstractions.GOG.Values.BuildId;
26+
using Connection = NexusMods.MnemonicDB.Connection;
2527

2628
namespace NexusMods.Games.FileHashes;
2729

2830
internal sealed class FileHashesService : IFileHashesService, IDisposable
2931
{
32+
private const string DefaultLanguage = "en-US";
33+
3034
private readonly ScopedAsyncLock _lock = new();
3135
private readonly FileHashesServiceSettings _settings;
3236
private readonly IFileSystem _fileSystem;
@@ -42,6 +46,8 @@ internal sealed class FileHashesService : IFileHashesService, IDisposable
4246
private ConnectedDb? _currentDb;
4347

4448
private readonly ILogger<FileHashesService> _logger;
49+
private readonly IQueryEngine _queryEngine;
50+
private IQueryMixin _queryMixin;
4551

4652
private record ConnectedDb(IDb Db, DatomStore Store, Backend Backend, DatabaseInfo DatabaseInfo);
4753

@@ -54,6 +60,8 @@ public FileHashesService(ILogger<FileHashesService> logger, ISettingsManager set
5460
_settings = settingsManager.Get<FileHashesServiceSettings>();
5561
_databases = new Dictionary<AbsolutePath, ConnectedDb>();
5662
_provider = provider;
63+
_queryEngine = provider.GetRequiredService<IQueryEngine>();
64+
_queryMixin = _queryEngine.DuckDb;
5765

5866
_hashDatabaseLocation = _settings.HashDatabaseLocation.ToPath(_fileSystem);
5967
_hashDatabaseLocation.CreateDirectory();
@@ -76,7 +84,7 @@ private ConnectedDb OpenDb(DatabaseInfo databaseInfo)
7684
};
7785

7886
var store = new DatomStore(_provider.GetRequiredService<ILogger<DatomStore>>(), settings, backend);
79-
var connection = new Connection(_provider.GetRequiredService<ILogger<Connection>>(), store, _provider, [], readOnlyMode: true);
87+
var connection = new Connection(_provider.GetRequiredService<ILogger<Connection>>(), store, _provider, [], readOnlyMode: true, prefix: "hashes", queryEngine: _queryEngine);
8088
var connectedDb = new ConnectedDb(connection.Db, store, backend, databaseInfo);
8189

8290
_databases[databaseInfo.Path] = connectedDb;
@@ -333,17 +341,48 @@ public IEnumerable<GameFileRecord> GetGameFiles(LocatorIdsWithGameStore locatorI
333341

334342
if (gameStore == GameStore.GOG)
335343
{
344+
HashSet<GogBuild.ReadOnly> gogBuilds = [];
345+
HashSet<ProductId> gogProducts = [];
346+
Dictionary<EntityId, GogManifest.ReadOnly> gogManifests = [];
347+
348+
// So first we find all the valid build Ids, and then assume that everything else is a product Id
336349
foreach (var id in locatorIds)
337350
{
338351
if (!ulong.TryParse(id.Value, out var parsedId))
339352
continue;
340353

341354
var gogId = BuildId.From(parsedId);
342355

343-
if (!GogBuild.FindByBuildId(Current, gogId).TryGetFirst(out var firstBuild))
356+
if (GogBuild.FindByBuildId(Current, gogId).TryGetFirst(out var firstBuild))
357+
{
358+
gogBuilds.Add(firstBuild);
344359
continue;
360+
}
361+
362+
var productId = ProductId.From(parsedId);
363+
gogProducts.Add(productId);
364+
}
365+
366+
// Now we emit all the files from the build products, and then also from any secondary products
367+
foreach (var build in gogBuilds)
368+
{
369+
foreach (var depot in build.Depots)
370+
{
371+
// We only care about the productId of the build, and the productIds of the secondary products
372+
if (!(depot.ProductId == build.ProductId || gogProducts.Contains(depot.ProductId)))
373+
continue;
374+
375+
// If there is a language setting for the files, they have to be the same as the default language
376+
if (!(depot.Languages.Count == 0 || depot.Languages.Contains(DefaultLanguage)))
377+
continue;
378+
379+
gogManifests[depot.Manifest.Id] = depot.Manifest;
380+
}
381+
}
345382

346-
foreach (var file in firstBuild.Files)
383+
foreach (var (_ , manifest) in gogManifests)
384+
{
385+
foreach (var file in manifest.Files)
347386
{
348387
yield return new GameFileRecord
349388
{
@@ -354,6 +393,7 @@ public IEnumerable<GameFileRecord> GetGameFiles(LocatorIdsWithGameStore locatorI
354393
};
355394
}
356395
}
396+
357397
}
358398
else if (gameStore == GameStore.Steam)
359399
{
@@ -560,7 +600,7 @@ public LocatorId[] GetLocatorIdsForVersionDefinition(GameStore gameStore, Versio
560600
{
561601
if (gameStore == GameStore.GOG)
562602
{
563-
return versionDefinition.GogBuilds.Select(build => LocatorId.From(build.BuildId.ToString())).ToArray();
603+
return versionDefinition.GogBuilds.Select(build => LocatorId.From(build.BuildId!.Value.ToString())).ToArray();
564604
}
565605

566606
if (gameStore == GameStore.Steam)
@@ -598,6 +638,42 @@ public Optional<VersionData> SuggestVersionData(GameInstallation gameInstallatio
598638
.FirstOrOptional(_ => true);
599639
}
600640

641+
public LocatorId[] GetLocatorIdsForGame(GameInstallation gameInstallation)
642+
{
643+
if (gameInstallation.Store == GameStore.Steam)
644+
{
645+
var ids = _queryMixin.Query<DepotId>("SELECT * FROM file_hashes.resolve_steam_depots({gameInstallation.GameMetadataId});")
646+
.Select(id => LocatorId.From(id.Value.ToString()))
647+
.ToArray();
648+
return ids;
649+
}
650+
else if (gameInstallation.Store == GameStore.GOG)
651+
{
652+
if (!_queryMixin.Query<(BuildId, ProductId, List<ProductId>)>("SELECT BuildId, BuildProductId, ProductIds FROM file_hashes.resolve_gog_build({gameInstallation.GameMetadataId})")
653+
.TryGetFirst(out var found))
654+
return [];
655+
656+
var ids = new List<LocatorId>();
657+
658+
ids.Add(LocatorId.From(found.Item1.Value.ToString()));
659+
660+
// We want to add the Build Id and then all the product Ids that are not the same as the Build's product
661+
foreach (var productId in found.Item3)
662+
{
663+
if (productId == found.Item2)
664+
continue;
665+
666+
ids.Add(LocatorId.From(productId.Value.ToString()));
667+
}
668+
669+
return ids.ToArray();
670+
}
671+
else
672+
{
673+
throw new NotSupportedException("No way to get locator IDs for: " + gameInstallation.Store);
674+
}
675+
}
676+
601677
/// <inheritdoc/>
602678
public void Dispose()
603679
{

0 commit comments

Comments
 (0)