From 129ad336af1fbb121d0fd85c3175ba59622e859f Mon Sep 17 00:00:00 2001 From: Dave Marion Date: Mon, 29 Jun 2026 13:21:01 +0000 Subject: [PATCH] Attempt at reducing GC memory usage when gathering candidates This change pushes the Value filtering to the server-side using an iterator instead of using a filter in a Stream, and sets an initial size on the candidate ArrayList to try and reduce resize operations. --- .../server/metadata/GcCandidateFilter.java | 33 +++++++++++++++++++ .../server/metadata/ServerAmpleImpl.java | 5 +-- .../java/org/apache/accumulo/gc/GCRun.java | 2 +- 3 files changed, 37 insertions(+), 3 deletions(-) create mode 100644 server/base/src/main/java/org/apache/accumulo/server/metadata/GcCandidateFilter.java diff --git a/server/base/src/main/java/org/apache/accumulo/server/metadata/GcCandidateFilter.java b/server/base/src/main/java/org/apache/accumulo/server/metadata/GcCandidateFilter.java new file mode 100644 index 00000000000..146734395f8 --- /dev/null +++ b/server/base/src/main/java/org/apache/accumulo/server/metadata/GcCandidateFilter.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.accumulo.server.metadata; + +import org.apache.accumulo.core.data.Key; +import org.apache.accumulo.core.data.Value; +import org.apache.accumulo.core.iterators.Filter; +import org.apache.accumulo.core.metadata.schema.MetadataSchema.DeletesSection.SkewedKeyValue; + +public class GcCandidateFilter extends Filter { + + @Override + public boolean accept(Key k, Value v) { + return v.equals(SkewedKeyValue.NAME); + } + +} diff --git a/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java b/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java index 22cb7a941fb..b47a4103968 100644 --- a/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java +++ b/server/base/src/main/java/org/apache/accumulo/server/metadata/ServerAmpleImpl.java @@ -36,6 +36,7 @@ import org.apache.accumulo.core.client.BatchScanner; import org.apache.accumulo.core.client.BatchWriter; import org.apache.accumulo.core.client.IsolatedScanner; +import org.apache.accumulo.core.client.IteratorSetting; import org.apache.accumulo.core.client.MutationsRejectedException; import org.apache.accumulo.core.client.Scanner; import org.apache.accumulo.core.client.TableNotFoundException; @@ -60,7 +61,6 @@ import org.apache.accumulo.core.metadata.schema.ExternalCompactionId; import org.apache.accumulo.core.metadata.schema.MetadataSchema.BlipSection; import org.apache.accumulo.core.metadata.schema.MetadataSchema.DeletesSection; -import org.apache.accumulo.core.metadata.schema.MetadataSchema.DeletesSection.SkewedKeyValue; import org.apache.accumulo.core.metadata.schema.MetadataSchema.ExternalCompactionSection; import org.apache.accumulo.core.metadata.schema.MetadataSchema.ScanServerFileReferenceSection; import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection.BulkFileColumnFamily; @@ -270,7 +270,8 @@ public Iterator getGcCandidates(DataLevel level) { throw new RuntimeException(e); } scanner.setRange(range); - return scanner.stream().filter(entry -> entry.getValue().equals(SkewedKeyValue.NAME)) + scanner.addScanIterator(new IteratorSetting(25, "gcCandidate", GcCandidateFilter.class)); + return scanner.stream() .map( entry -> new GcCandidate(DeletesSection.decodeRow(entry.getKey().getRow().toString()), entry.getKey().getTimestamp())) diff --git a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java index 9c8b1da8b5b..2a2261a8dd9 100644 --- a/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java +++ b/server/gc/src/main/java/org/apache/accumulo/gc/GCRun.java @@ -150,7 +150,7 @@ public List readCandidatesThatFitInMemory(Iterator can // Converting the bytes to approximate number of characters for batch size. long candidateBatchSize = getCandidateBatchSize() / 2; - List candidatesBatch = new ArrayList<>(); + List candidatesBatch = new ArrayList<>(256); batchCount.incrementAndGet(); while (candidates.hasNext()) {