@@ -10,6 +10,7 @@ import (
1010 "strings"
1111 "time"
1212
13+ "github.com/alitto/pond/v2"
1314 "github.com/cenkalti/backoff/v4"
1415 "github.com/google/uuid"
1516 "github.com/hashicorp/go-multierror"
@@ -23,6 +24,8 @@ import (
2324 "google.golang.org/protobuf/types/known/timestamppb"
2425)
2526
27+ const defaultSharedWorkers = 5
28+
2629var kvRetriesCounter = promauto .NewCounterVec (
2730 prometheus.CounterOpts {
2831 Name : "graveler_kv_retries" ,
@@ -31,6 +34,17 @@ var kvRetriesCounter = promauto.NewCounterVec(
3134 []string {"operation" },
3235)
3336
37+ // GravelerConfig holds the configuration for creating a Graveler instance
38+ type GravelerConfig struct {
39+ CommittedManager CommittedManager
40+ StagingManager StagingManager
41+ RefManager RefManager
42+ GarbageCollectionManager GarbageCollectionManager
43+ ProtectedBranchesManager ProtectedBranchesManager
44+ DeleteSensor * DeleteSensor
45+ WorkPool pond.Pool
46+ }
47+
3448//go:generate go run github.com/golang/mock/[email protected] -source=graveler.go -destination=mock/graveler.go -package=mock 3549
3650const (
@@ -198,27 +212,29 @@ func WithStageOnly(v bool) GetOptionsFunc {
198212 }
199213}
200214
201- type ConditionFunc func (currentValue * Value ) error
202- type SetOptions struct {
203- // MaxTries set number of times we try to perform the operation before we fail with BranchWriteMaxTries.
204- // By default, 0 - we try BranchWriteMaxTries
205- MaxTries int
206- // Force set to true will bypass repository read-only protection.
207- Force bool
208- // AllowEmpty set to true will allow committing an empty commit.
209- AllowEmpty bool
210- // Hidden Will create the branch with the hidden property
211- Hidden bool
212- // SquashMerge causes merge commits to be "squashed", losing parent
213- // information about the merged-from branch.
214- SquashMerge bool
215- // NoTombstone will try to remove entry without setting a tombstone in KV
216- NoTombstone bool
217- // Condition is a function that validates the current value before performing the Set.
218- // If the condition returns an error, the Set operation fails with that error.
219- // If the condition succeeds, the Set is performed using SetIf with the current value.
220- Condition ConditionFunc
221- }
215+ type (
216+ ConditionFunc func (currentValue * Value ) error
217+ SetOptions struct {
218+ // MaxTries set number of times we try to perform the operation before we fail with BranchWriteMaxTries.
219+ // By default, 0 - we try BranchWriteMaxTries
220+ MaxTries int
221+ // Force set to true will bypass repository read-only protection.
222+ Force bool
223+ // AllowEmpty set to true will allow committing an empty commit.
224+ AllowEmpty bool
225+ // Hidden Will create the branch with the hidden property
226+ Hidden bool
227+ // SquashMerge causes merge commits to be "squashed", losing parent
228+ // information about the merged-from branch.
229+ SquashMerge bool
230+ // NoTombstone will try to remove entry without setting a tombstone in KV
231+ NoTombstone bool
232+ // Condition is a function that validates the current value before performing the Set.
233+ // If the condition returns an error, the Set operation fails with that error.
234+ // If the condition succeeds, the Set is performed using SetIf with the current value.
235+ Condition ConditionFunc
236+ }
237+ )
222238
223239type SetOptionsFunc func (opts * SetOptions )
224240
@@ -1205,22 +1221,30 @@ type Graveler struct {
12051221 logger logging.Logger
12061222 BranchUpdateBackOff backoff.BackOff
12071223 deleteSensor * DeleteSensor
1224+ workPool pond.Pool
12081225}
12091226
1210- func NewGraveler (committedManager CommittedManager , stagingManager StagingManager , refManager RefManager , gcManager GarbageCollectionManager , protectedBranchesManager ProtectedBranchesManager , deleteSensor * DeleteSensor ) * Graveler {
1227+ func NewGraveler (cfg GravelerConfig ) * Graveler {
12111228 branchUpdateBackOff := backoff .NewExponentialBackOff ()
12121229 branchUpdateBackOff .MaxInterval = BranchUpdateMaxInterval
12131230
1231+ workPool := cfg .WorkPool
1232+ if workPool == nil {
1233+ // Create a default work pool with 5 workers for basic operations
1234+ workPool = pond .NewPool (defaultSharedWorkers )
1235+ }
1236+
12141237 return & Graveler {
12151238 hooks : & HooksNoOp {},
1216- CommittedManager : committedManager ,
1217- RefManager : refManager ,
1218- StagingManager : stagingManager ,
1239+ CommittedManager : cfg . CommittedManager ,
1240+ RefManager : cfg . RefManager ,
1241+ StagingManager : cfg . StagingManager ,
12191242 BranchUpdateBackOff : branchUpdateBackOff ,
1220- protectedBranchesManager : protectedBranchesManager ,
1221- garbageCollectionManager : gcManager ,
1243+ protectedBranchesManager : cfg . ProtectedBranchesManager ,
1244+ garbageCollectionManager : cfg . GarbageCollectionManager ,
12221245 logger : logging .ContextUnavailable ().WithField ("service_name" , "graveler_graveler" ),
1223- deleteSensor : deleteSensor ,
1246+ deleteSensor : cfg .DeleteSensor ,
1247+ workPool : workPool ,
12241248 }
12251249}
12261250
@@ -1991,8 +2015,7 @@ func (g *Graveler) Delete(ctx context.Context, repository *RepositoryRecord, bra
19912015 return err
19922016}
19932017
1994- // DeleteBatch delete batch of keys. Keys length is limited to DeleteKeysMaxSize. Return error can be of type
1995- // 'multi-error' holds DeleteError with each key/error that failed as part of the batch.
2018+ // DeleteBatch delete batch of keys. Keys length is limited to DeleteKeysMaxSize. Returns the first error encountered during deletion.
19962019func (g * Graveler ) DeleteBatch (ctx context.Context , repository * RepositoryRecord , branchID BranchID , keys []Key , opts ... SetOptionsFunc ) error {
19972020 isProtected , err := g .protectedBranchesManager .IsBlocked (ctx , repository , branchID , BranchProtectionBlockedAction_STAGING_WRITE )
19982021 if err != nil {
@@ -2011,20 +2034,24 @@ func (g *Graveler) DeleteBatch(ctx context.Context, repository *RepositoryRecord
20112034 return fmt .Errorf ("keys length (%d) passed the maximum allowed(%d): %w" , len (keys ), DeleteKeysMaxSize , ErrInvalidValue )
20122035 }
20132036
2014- var m * multierror.Error
20152037 log := g .log (ctx ).WithField ("operation" , "delete_keys" )
20162038 deleteFunc := g .deleteUnsafe
20172039 if options .NoTombstone {
20182040 deleteFunc = g .deleteNoTombstoneUnsafe
20192041 }
20202042 err = g .safeBranchWrite (ctx , log , repository , branchID , safeBranchWriteOptions {}, func (branch * Branch ) error {
2043+ // Use workpool for parallel deletion
2044+ workerGroup := g .workPool .NewGroupContext (ctx )
2045+
2046+ // Submit delete tasks to workpool
20212047 for _ , key := range keys {
2022- err = deleteFunc (ctx , repository , key , BranchRecord {branchID , branch })
2023- if err != nil {
2024- m = multierror .Append (m , & DeleteError {Key : key , Err : err })
2025- }
2048+ workerGroup .SubmitErr (func () error {
2049+ return deleteFunc (ctx , repository , key , BranchRecord {branchID , branch })
2050+ })
20262051 }
2027- return m .ErrorOrNil ()
2052+
2053+ // Wait for the first error or completion
2054+ return workerGroup .Wait ()
20282055 }, "delete_keys" )
20292056 return err
20302057}
0 commit comments