Skip to content
Open
7 changes: 4 additions & 3 deletions integrating.md
Original file line number Diff line number Diff line change
Expand Up @@ -245,11 +245,12 @@ Ensure `mongocrypt_setopt_retry_kms` is called on the `mongocrypt_t` to enable r
c. Write the message from `mongocrypt_kms_ctx_message` to the
> socket.

d. Feed the reply back with `mongocrypt_kms_ctx_feed`. Repeat
> until `mongocrypt_kms_ctx_bytes_needed` returns 0.
d. Feed the reply back with `mongocrypt_kms_ctx_feed` or `mongocrypt_kms_ctx_feed_with_retry`. Repeat
> until `mongocrypt_kms_ctx_bytes_needed` returns 0. If the `should_retry` outparam returns true,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
> until `mongocrypt_kms_ctx_bytes_needed` returns 0. If the `should_retry` outparam returns true,
until `mongocrypt_kms_ctx_bytes_needed` returns 0. If the `should_retry` outparam returns true,

the request may be retried by feeding the new response into the same context.

If any step encounters a network error, call `mongocrypt_kms_ctx_fail`.
If `mongocrypt_kms_ctx_fail` returns true, continue to the next KMS context.
If `mongocrypt_kms_ctx_fail` returns true, retry the request by continuing to the next KMS context or by feeding the new response into the same context.
If `mongocrypt_kms_ctx_fail` returns false, abort and report an error. Consider wrapping the error reported in `mongocrypt_kms_ctx_status` to include the last network error.

Copy link
Contributor

@kevinAlbs kevinAlbs Aug 21, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The note below might be confusing, since it mentions fanning out requests with the older retry behavior:

Note, the driver MAY fan out KMS requests in parallel. More KMS requests may be added when processing responses to retry.

Consider adding a section describing retry and iteration. Idea:

Retry and Iteration

Retry behavior is enabled by calling mongocrypt_setopt_retry_kms.

There are two options for retry:

  • Lazy retry: After processing KMS contexts, iterate again by calling mongocrypt_ctx_next_kms_ctx. KMS contexts needing a retry will be returned.
  • In-place retry: If a KMS context indicates retry, retry the KMS request and feed to the response to the same KMS request. Use mongocrypt_kms_ctx_feed_with_retry and check the return of mongocrypt_kms_ctx_fail to check if a retry is indicated.

The driver MAY fan out KMS requests in parallel. It is not safe to iterate KMS contexts (i.e. call mongocrypt_ctx_next_kms_ctx) while operating on KMS contexts (e.g. calling mongocrypt_kms_ctx_feed). Drivers are recommended to do an in-place retry on KMS requests.

2. When done feeding all replies, call `mongocrypt_ctx_kms_done`.
Expand Down
56 changes: 34 additions & 22 deletions src/mongocrypt-kms-ctx.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/

#include "kms_message/kms_kmip_request.h"
#include "kms_message/kms_response_parser.h"
#include "mongocrypt-binary-private.h"
#include "mongocrypt-buffer-private.h"
#include "mongocrypt-crypto-private.h"
Expand Down Expand Up @@ -518,6 +519,9 @@ static void set_retry(mongocrypt_kms_ctx_t *kms) {
kms->should_retry = true;
kms->attempts++;
kms->sleep_usec = backoff_time_usec(kms->attempts);
if (kms->parser) {
kms_response_parser_reset(kms->parser);
}
}

/* An AWS KMS context has received full response. Parse out the result or error.
Expand Down Expand Up @@ -1120,6 +1124,24 @@ static bool _ctx_done_kmip_decrypt(mongocrypt_kms_ctx_t *kms_ctx) {
return ret;
}

static bool _is_retryable_req(_kms_request_type_t req_type) {
// Check if request type is retryable. Some requests are non-idempotent and cannot be safely retried.
_kms_request_type_t retryable_types[] = {MONGOCRYPT_KMS_AZURE_OAUTH,
MONGOCRYPT_KMS_GCP_OAUTH,
MONGOCRYPT_KMS_AWS_ENCRYPT,
MONGOCRYPT_KMS_AWS_DECRYPT,
MONGOCRYPT_KMS_AZURE_WRAPKEY,
MONGOCRYPT_KMS_AZURE_UNWRAPKEY,
MONGOCRYPT_KMS_GCP_ENCRYPT,
MONGOCRYPT_KMS_GCP_DECRYPT};
for (size_t i = 0; i < sizeof(retryable_types) / sizeof(retryable_types[0]); i++) {
if (retryable_types[i] == req_type) {
return true;
}
}
return false;
}

bool mongocrypt_kms_ctx_fail(mongocrypt_kms_ctx_t *kms) {
if (!kms) {
return false;
Expand All @@ -1138,37 +1160,23 @@ bool mongocrypt_kms_ctx_fail(mongocrypt_kms_ctx_t *kms) {
return false;
}

// Check if request type is retryable. Some requests are non-idempotent and cannot be safely retried.
_kms_request_type_t retryable_types[] = {MONGOCRYPT_KMS_AZURE_OAUTH,
MONGOCRYPT_KMS_GCP_OAUTH,
MONGOCRYPT_KMS_AWS_ENCRYPT,
MONGOCRYPT_KMS_AWS_DECRYPT,
MONGOCRYPT_KMS_AZURE_WRAPKEY,
MONGOCRYPT_KMS_AZURE_UNWRAPKEY,
MONGOCRYPT_KMS_GCP_ENCRYPT,
MONGOCRYPT_KMS_GCP_DECRYPT};
bool is_retryable = false;
for (size_t i = 0; i < sizeof(retryable_types) / sizeof(retryable_types[0]); i++) {
if (retryable_types[i] == kms->req_type) {
is_retryable = true;
break;
}
}
if (!is_retryable) {
if (!_is_retryable_req(kms->req_type)) {
CLIENT_ERR("KMS request failed due to network error");
return false;
}

// Mark KMS context as retryable. Return again in `mongocrypt_ctx_next_kms_ctx`.
set_retry(kms);

// Reset intermediate state of parser.
if (kms->parser) {
kms_response_parser_reset(kms->parser);
}
return true;
}

bool mongocrypt_kms_ctx_feed_with_retry(mongocrypt_kms_ctx_t *kms, mongocrypt_binary_t *bytes, bool *should_retry) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
bool mongocrypt_kms_ctx_feed_with_retry(mongocrypt_kms_ctx_t *kms, mongocrypt_binary_t *bytes, bool *should_retry) {
bool mongocrypt_kms_ctx_feed_with_retry(mongocrypt_kms_ctx_t *kms, mongocrypt_binary_t *bytes, bool *should_retry) {
BSON_ASSERT_PARAM(kms);
BSON_ASSERT_PARAM(bytes);
BSON_ASSERT_PARAM(should_retry);

Suggest asserting required args are non-NULL to abort early. I expect the abort would only occur due to a driver bug (not during normal execution).

*should_retry = false;
const bool res = mongocrypt_kms_ctx_feed(kms, bytes);
*should_retry = kms->should_retry && kms->retry_enabled;
return res;
}

bool mongocrypt_kms_ctx_feed(mongocrypt_kms_ctx_t *kms, mongocrypt_binary_t *bytes) {
if (!kms) {
return false;
Expand All @@ -1178,6 +1186,10 @@ bool mongocrypt_kms_ctx_feed(mongocrypt_kms_ctx_t *kms, mongocrypt_binary_t *byt
if (!mongocrypt_status_ok(status)) {
return false;
}
if (kms->should_retry) {
// This happens when a KMS context is reused in-place
kms->should_retry = false;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

With mongocrypt_kms_ctx_feed_with_retry, I expect it would be a driver bug to call mongocrypt_kms_ctx_feed directly on a KMS context needing retry. Calling mongocrypt_kms_ctx_feed suggests the driver is not doing in-place retry.

Consider making this an error:

if (kms->should_retry) {
     CLIENT_ERR ("KMS context needs retry. Call mongocrypt_kms_ctx_feed_with_retry instead");
     return false;
}

And setting kms->should_retry to false in mongocrypt_kms_ctx_feed_with_retry.


if (!bytes) {
CLIENT_ERR("argument 'bytes' is required");
Expand Down
20 changes: 19 additions & 1 deletion src/mongocrypt.h
Original file line number Diff line number Diff line change
Expand Up @@ -1180,7 +1180,25 @@ MONGOCRYPT_EXPORT
bool mongocrypt_kms_ctx_feed(mongocrypt_kms_ctx_t *kms, mongocrypt_binary_t *bytes);

/**
* Indicate a network-level failure.
* Feed bytes from the HTTP response.
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
* Feed bytes from the HTTP response.
* Feed bytes from the KMS response.

Notably: KMIP is not HTTP. Suggest also updating the comment in mongocrypt_kms_ctx_feed to match.

*
* Feeding more bytes than what has been returned in @ref
* mongocrypt_kms_ctx_bytes_needed is an error.
*
* @param[in] kms The @ref mongocrypt_kms_ctx_t.
* @param[in] bytes The bytes to feed. The viewed data is copied. It is valid to
* destroy @p bytes with @ref mongocrypt_binary_destroy immediately after.
* @param[out] should_retry Whether the KMS request should be retried. Retry in-place
* without calling @ref mongocrypt_kms_ctx_fail.
* @returns A boolean indicating success. If false, an error status is set.
* Retrieve it with @ref mongocrypt_kms_ctx_status
*/
MONGOCRYPT_EXPORT
bool mongocrypt_kms_ctx_feed_with_retry(mongocrypt_kms_ctx_t *kms, mongocrypt_binary_t *bytes, bool *should_retry);

/**
* Indicate a network error. Discards all data fed to this KMS context with @ref mongocrypt_kms_ctx_feed.
* The @ref mongocrypt_kms_ctx_t may be reused.
*
* @param[in] kms The @ref mongocrypt_kms_ctx_t.
* @return A boolean indicating whether the failed request may be retried.
Expand Down
7 changes: 7 additions & 0 deletions test/data/kms-aws/encrypt-response-partial.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
HTTP/1.1 200 OK
x-amzn-RequestId: deeb35e5-4ecb-4bf1-9af5-84a54ff0af0e
Content-Type: application/x-amz-json-1.1
Content-Length: 446
Connection: close

{"KeyId": "arn:aws:k
128 changes: 128 additions & 0 deletions test/test-mongocrypt-datakey.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,40 @@ static void _test_create_datakey_with_retry(_mongocrypt_tester_t *tester) {
mongocrypt_destroy(crypt);
}

// Test that an HTTP error is retried in-place.
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
mongocrypt_ctx_t *ctx = mongocrypt_ctx_new(crypt);
bool should_retry;
ASSERT_OK(
mongocrypt_ctx_setopt_key_encryption_key(ctx,
TEST_BSON("{'provider': 'aws', 'key': 'foo', 'region': 'bar'}")),
ctx);
ASSERT_OK(mongocrypt_ctx_datakey_init(ctx), ctx);
ASSERT_STATE_EQUAL(mongocrypt_ctx_state(ctx), MONGOCRYPT_CTX_NEED_KMS);
mongocrypt_kms_ctx_t *kms_ctx = mongocrypt_ctx_next_kms_ctx(ctx);
ASSERT_OK(kms_ctx, ctx);
// Expect no sleep is requested before any error.
ASSERT_CMPINT64(mongocrypt_kms_ctx_usleep(kms_ctx), ==, 0);
// Feed a retryable HTTP error.
ASSERT_OK(mongocrypt_kms_ctx_feed_with_retry(kms_ctx, TEST_FILE("./test/data/rmd/kms-decrypt-reply-429.txt"), &should_retry), kms_ctx);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Run ./etc/format-all.sh to fix the check-format Evergreen task.

// In-place retry is indicated.
ASSERT(should_retry);
// Feed another retryable HTTP error.
ASSERT_OK(mongocrypt_kms_ctx_feed_with_retry(kms_ctx, TEST_FILE("./test/data/rmd/kms-decrypt-reply-429.txt"), &should_retry), kms_ctx);
// Expect some sleep is requested
ASSERT_CMPINT64(mongocrypt_kms_ctx_usleep(kms_ctx), >=, 0);
// In-place retry is indicated.
ASSERT(should_retry);
ASSERT(kms_ctx->attempts == 2);
// Feed a successful response.
ASSERT_OK(mongocrypt_kms_ctx_feed(kms_ctx, TEST_FILE("./test/data/kms-aws/encrypt-response.txt")), kms_ctx);
ASSERT_OK(mongocrypt_ctx_kms_done(ctx), ctx);
_mongocrypt_tester_run_ctx_to(tester, ctx, MONGOCRYPT_CTX_DONE);
mongocrypt_ctx_destroy(ctx);
mongocrypt_destroy(crypt);
}

// Test that a network error is retried.
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
Expand Down Expand Up @@ -454,6 +488,100 @@ static void _test_create_datakey_with_retry(_mongocrypt_tester_t *tester) {
mongocrypt_destroy(crypt);
}

// Test that a network error is retried in-place.
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
mongocrypt_ctx_t *ctx = mongocrypt_ctx_new(crypt);
ASSERT_OK(
mongocrypt_ctx_setopt_key_encryption_key(ctx,
TEST_BSON("{'provider': 'aws', 'key': 'foo', 'region': 'bar'}")),
ctx);
ASSERT_OK(mongocrypt_ctx_datakey_init(ctx), ctx);
ASSERT_STATE_EQUAL(mongocrypt_ctx_state(ctx), MONGOCRYPT_CTX_NEED_KMS);
mongocrypt_kms_ctx_t *kms_ctx = mongocrypt_ctx_next_kms_ctx(ctx);
ASSERT_OK(kms_ctx, ctx);
// Expect no sleep is requested before any error.
ASSERT_CMPINT64(mongocrypt_kms_ctx_usleep(kms_ctx), ==, 0);
// Mark a network error.
ASSERT_OK(mongocrypt_kms_ctx_fail(kms_ctx), kms_ctx);
// Feed a partial response
ASSERT_OK(mongocrypt_kms_ctx_feed(kms_ctx, TEST_FILE("./test/data/kms-aws/encrypt-response-partial.txt")), kms_ctx);
// Mark another network error.
ASSERT_OK(mongocrypt_kms_ctx_fail(kms_ctx), kms_ctx);
// Expect some sleep is requested
ASSERT_CMPINT64(mongocrypt_kms_ctx_usleep(kms_ctx), >=, 0);
ASSERT(kms_ctx->attempts == 2);
// Feed a successful response.
ASSERT_OK(mongocrypt_kms_ctx_feed(kms_ctx, TEST_FILE("./test/data/kms-aws/encrypt-response.txt")), kms_ctx);
ASSERT_OK(mongocrypt_ctx_kms_done(ctx), ctx);
_mongocrypt_tester_run_ctx_to(tester, ctx, MONGOCRYPT_CTX_DONE);
mongocrypt_ctx_destroy(ctx);
mongocrypt_destroy(crypt);
}
// Test that subsequent network and HTTP errors can be retried in-place
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
mongocrypt_ctx_t *ctx = mongocrypt_ctx_new(crypt);
bool should_retry;
ASSERT_OK(
mongocrypt_ctx_setopt_key_encryption_key(ctx,
TEST_BSON("{'provider': 'aws', 'key': 'foo', 'region': 'bar'}")),
ctx);
ASSERT_OK(mongocrypt_ctx_datakey_init(ctx), ctx);
ASSERT_STATE_EQUAL(mongocrypt_ctx_state(ctx), MONGOCRYPT_CTX_NEED_KMS);
mongocrypt_kms_ctx_t *kms_ctx = mongocrypt_ctx_next_kms_ctx(ctx);
ASSERT_OK(kms_ctx, ctx);
// Expect no sleep is requested before any error.
ASSERT_CMPINT64(mongocrypt_kms_ctx_usleep(kms_ctx), ==, 0);
// Mark a network error.
ASSERT_OK(mongocrypt_kms_ctx_fail(kms_ctx), kms_ctx);
// Feed a retryable HTTP error.
ASSERT_OK(mongocrypt_kms_ctx_feed_with_retry(kms_ctx, TEST_FILE("./test/data/rmd/kms-decrypt-reply-429.txt"), &should_retry), kms_ctx);
// In-place retry is indicated.
ASSERT(should_retry);
// Expect some sleep is requested
ASSERT_CMPINT64(mongocrypt_kms_ctx_usleep(kms_ctx), >=, 0);
ASSERT(kms_ctx->attempts == 2);
// Feed a successful response.
ASSERT_OK(mongocrypt_kms_ctx_feed(kms_ctx, TEST_FILE("./test/data/kms-aws/encrypt-response.txt")), kms_ctx);
ASSERT_OK(mongocrypt_ctx_kms_done(ctx), ctx);
_mongocrypt_tester_run_ctx_to(tester, ctx, MONGOCRYPT_CTX_DONE);
mongocrypt_ctx_destroy(ctx);
mongocrypt_destroy(crypt);
}

// Test that subsequent HTTP and network errors can be retried in-place
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
mongocrypt_ctx_t *ctx = mongocrypt_ctx_new(crypt);
bool should_retry;
ASSERT_OK(
mongocrypt_ctx_setopt_key_encryption_key(ctx,
TEST_BSON("{'provider': 'aws', 'key': 'foo', 'region': 'bar'}")),
ctx);
ASSERT_OK(mongocrypt_ctx_datakey_init(ctx), ctx);
ASSERT_STATE_EQUAL(mongocrypt_ctx_state(ctx), MONGOCRYPT_CTX_NEED_KMS);
mongocrypt_kms_ctx_t *kms_ctx = mongocrypt_ctx_next_kms_ctx(ctx);
ASSERT_OK(kms_ctx, ctx);
// Expect no sleep is requested before any error.
ASSERT_CMPINT64(mongocrypt_kms_ctx_usleep(kms_ctx), ==, 0);
// Feed a retryable HTTP error.
ASSERT_OK(mongocrypt_kms_ctx_feed_with_retry(kms_ctx, TEST_FILE("./test/data/rmd/kms-decrypt-reply-429.txt"), &should_retry), kms_ctx);
// In-place retry is indicated.
ASSERT(should_retry);
// Mark a network error.
ASSERT_OK(mongocrypt_kms_ctx_fail(kms_ctx), kms_ctx);
// Expect some sleep is requested
ASSERT_CMPINT64(mongocrypt_kms_ctx_usleep(kms_ctx), >=, 0);
ASSERT(kms_ctx->attempts == 2);
// Feed a successful response.
ASSERT_OK(mongocrypt_kms_ctx_feed(kms_ctx, TEST_FILE("./test/data/kms-aws/encrypt-response.txt")), kms_ctx);
ASSERT_OK(mongocrypt_ctx_kms_done(ctx), ctx);
_mongocrypt_tester_run_ctx_to(tester, ctx, MONGOCRYPT_CTX_DONE);
mongocrypt_ctx_destroy(ctx);
mongocrypt_destroy(crypt);
}

// Test that an oauth request is retried for a network error.
{
mongocrypt_t *crypt = _mongocrypt_tester_mongocrypt(TESTER_MONGOCRYPT_DEFAULT);
Expand Down