spring-bootspring-webfluxproject-reactorresilience4j

Resilience4j and reactor RetryWhen not working together to distribuite 100 request at 10 requests per second


I'm calling a remote service and don't want to exceed 10 RPS, so I configured Resilience4j Rate Limiter and added retryWhen to handle RequestNotPermitted errors and retry when it is allowed.

The problem I'm having is it is not retrying multiple times so I never get results further than the id 10.

I created this simplified sample code:

  public static void main(final String[] args) {

    final RateLimiterConfig config = RateLimiterConfig.custom()
        .limitForPeriod(10)
        .limitRefreshPeriod(Duration.ofSeconds(1))
        .timeoutDuration(Duration.ofMillis(0))
        .build();

    final RateLimiterRegistry rateLimiterRegistry = RateLimiterRegistry.of(config);
    final RateLimiter rateLimiter = rateLimiterRegistry.rateLimiter("exampleService");

    final Flux<Integer> ids = Flux.range(1, 100); // Example Flux of IDs

    ids.flatMapSequential(id -> Mono.defer(() -> invokeRemoteService(id)
        .transformDeferred(RateLimiterOperator.of(rateLimiter))
        .retryWhen(retrySpec(id))))
        .doOnNext(response -> log.info("Response: {}", response))
        .subscribe();
  }

  private static Mono<String> invokeRemoteService(final Integer integer) {
    return Mono.just(integer + "");
  }

  private static RetryBackoffSpec retrySpec(final Integer id) {
    return Retry.fixedDelay(10, Duration.ofMillis(1000))
        .filter(throwable -> {
          final boolean retry = throwable instanceof RequestNotPermitted;
          log.info("Retrying {}? {}", id, retry);
          return retry;
        });
  }

And this is the output:

11:07:04.100 [main] INFO com.sample.Resilience -- Response: 1
11:07:04.105 [main] INFO com.sample.Resilience -- Response: 2
11:07:04.106 [main] INFO com.sample.Resilience -- Response: 3
11:07:04.107 [main] INFO com.sample.Resilience -- Response: 4
11:07:04.107 [main] INFO com.sample.Resilience -- Response: 5
11:07:04.107 [main] INFO com.sample.Resilience -- Response: 6
11:07:04.108 [main] INFO com.sample.Resilience -- Response: 7
11:07:04.108 [main] INFO com.sample.Resilience -- Response: 8
11:07:04.109 [main] INFO com.sample.Resilience -- Response: 9
11:07:04.109 [main] INFO com.sample.Resilience -- Response: 10
11:07:04.117 [main] INFO com.sample.Resilience -- Retrying 11? true
11:07:04.153 [main] INFO com.sample.Resilience -- Retrying 12? true
11:07:04.154 [main] INFO com.sample.Resilience -- Retrying 13? true
11:07:04.155 [main] INFO com.sample.Resilience -- Retrying 14? true
11:07:04.156 [main] INFO com.sample.Resilience -- Retrying 15? true
11:07:04.157 [main] INFO com.sample.Resilience -- Retrying 16? true
11:07:04.159 [main] INFO com.sample.Resilience -- Retrying 17? true
11:07:04.160 [main] INFO com.sample.Resilience -- Retrying 18? true
11:07:04.161 [main] INFO com.sample.Resilience -- Retrying 19? true
11:07:04.162 [main] INFO com.sample.Resilience -- Retrying 20? true
11:07:04.163 [main] INFO com.sample.Resilience -- Retrying 21? true
11:07:04.164 [main] INFO com.sample.Resilience -- Retrying 22? true
11:07:04.165 [main] INFO com.sample.Resilience -- Retrying 23? true
11:07:04.166 [main] INFO com.sample.Resilience -- Retrying 24? true
11:07:04.167 [main] INFO com.sample.Resilience -- Retrying 25? true
11:07:04.169 [main] INFO com.sample.Resilience -- Retrying 26? true
11:07:04.170 [main] INFO com.sample.Resilience -- Retrying 27? true
11:07:04.171 [main] INFO com.sample.Resilience -- Retrying 28? true
11:07:04.171 [main] INFO com.sample.Resilience -- Retrying 29? true
11:07:04.171 [main] INFO com.sample.Resilience -- Retrying 30? true
11:07:04.172 [main] INFO com.sample.Resilience -- Retrying 31? true
11:07:04.173 [main] INFO com.sample.Resilience -- Retrying 32? true
11:07:04.173 [main] INFO com.sample.Resilience -- Retrying 33? true
11:07:04.174 [main] INFO com.sample.Resilience -- Retrying 34? true
11:07:04.175 [main] INFO com.sample.Resilience -- Retrying 35? true
11:07:04.175 [main] INFO com.sample.Resilience -- Retrying 36? true
11:07:04.176 [main] INFO com.sample.Resilience -- Retrying 37? true
11:07:04.176 [main] INFO com.sample.Resilience -- Retrying 38? true
11:07:04.177 [main] INFO com.sample.Resilience -- Retrying 39? true
11:07:04.178 [main] INFO com.sample.Resilience -- Retrying 40? true
11:07:04.179 [main] INFO com.sample.Resilience -- Retrying 41? true
11:07:04.181 [main] INFO com.sample.Resilience -- Retrying 42? true
11:07:04.181 [main] INFO com.sample.Resilience -- Retrying 43? true
11:07:04.182 [main] INFO com.sample.Resilience -- Retrying 44? true
11:07:04.182 [main] INFO com.sample.Resilience -- Retrying 45? true
11:07:04.183 [main] INFO com.sample.Resilience -- Retrying 46? true
11:07:04.184 [main] INFO com.sample.Resilience -- Retrying 47? true
11:07:04.184 [main] INFO com.sample.Resilience -- Retrying 48? true
11:07:04.185 [main] INFO com.sample.Resilience -- Retrying 49? true
11:07:04.186 [main] INFO com.sample.Resilience -- Retrying 50? true
11:07:04.186 [main] INFO com.sample.Resilience -- Retrying 51? true
11:07:04.187 [main] INFO com.sample.Resilience -- Retrying 52? true
11:07:04.187 [main] INFO com.sample.Resilience -- Retrying 53? true
11:07:04.188 [main] INFO com.sample.Resilience -- Retrying 54? true
11:07:04.189 [main] INFO com.sample.Resilience -- Retrying 55? true
11:07:04.189 [main] INFO com.sample.Resilience -- Retrying 56? true
11:07:04.190 [main] INFO com.sample.Resilience -- Retrying 57? true
11:07:04.191 [main] INFO com.sample.Resilience -- Retrying 58? true
11:07:04.192 [main] INFO com.sample.Resilience -- Retrying 59? true
11:07:04.192 [main] INFO com.sample.Resilience -- Retrying 60? true
11:07:04.193 [main] INFO com.sample.Resilience -- Retrying 61? true
11:07:04.193 [main] INFO com.sample.Resilience -- Retrying 62? true
11:07:04.194 [main] INFO com.sample.Resilience -- Retrying 63? true
11:07:04.194 [main] INFO com.sample.Resilience -- Retrying 64? true
11:07:04.195 [main] INFO com.sample.Resilience -- Retrying 65? true
11:07:04.196 [main] INFO com.sample.Resilience -- Retrying 66? true
11:07:04.196 [main] INFO com.sample.Resilience -- Retrying 67? true
11:07:04.197 [main] INFO com.sample.Resilience -- Retrying 68? true
11:07:04.197 [main] INFO com.sample.Resilience -- Retrying 69? true
11:07:04.198 [main] INFO com.sample.Resilience -- Retrying 70? true
11:07:04.198 [main] INFO com.sample.Resilience -- Retrying 71? true
11:07:04.199 [main] INFO com.sample.Resilience -- Retrying 72? true
11:07:04.199 [main] INFO com.sample.Resilience -- Retrying 73? true
11:07:04.200 [main] INFO com.sample.Resilience -- Retrying 74? true
11:07:04.201 [main] INFO com.sample.Resilience -- Retrying 75? true
11:07:04.202 [main] INFO com.sample.Resilience -- Retrying 76? true
11:07:04.203 [main] INFO com.sample.Resilience -- Retrying 77? true
11:07:04.204 [main] INFO com.sample.Resilience -- Retrying 78? true
11:07:04.204 [main] INFO com.sample.Resilience -- Retrying 79? true
11:07:04.205 [main] INFO com.sample.Resilience -- Retrying 80? true
11:07:04.206 [main] INFO com.sample.Resilience -- Retrying 81? true
11:07:04.207 [main] INFO com.sample.Resilience -- Retrying 82? true
11:07:04.207 [main] INFO com.sample.Resilience -- Retrying 83? true
11:07:04.208 [main] INFO com.sample.Resilience -- Retrying 84? true
11:07:04.208 [main] INFO com.sample.Resilience -- Retrying 85? true
11:07:04.209 [main] INFO com.sample.Resilience -- Retrying 86? true
11:07:04.209 [main] INFO com.sample.Resilience -- Retrying 87? true
11:07:04.210 [main] INFO com.sample.Resilience -- Retrying 88? true
11:07:04.212 [main] INFO com.sample.Resilience -- Retrying 89? true
11:07:04.213 [main] INFO com.sample.Resilience -- Retrying 90? true
11:07:04.214 [main] INFO com.sample.Resilience -- Retrying 91? true
11:07:04.215 [main] INFO com.sample.Resilience -- Retrying 92? true
11:07:04.215 [main] INFO com.sample.Resilience -- Retrying 93? true
11:07:04.216 [main] INFO com.sample.Resilience -- Retrying 94? true
11:07:04.217 [main] INFO com.sample.Resilience -- Retrying 95? true
11:07:04.218 [main] INFO com.sample.Resilience -- Retrying 96? true
11:07:04.219 [main] INFO com.sample.Resilience -- Retrying 97? true
11:07:04.220 [main] INFO com.sample.Resilience -- Retrying 98? true
11:07:04.220 [main] INFO com.sample.Resilience -- Retrying 99? true
11:07:04.221 [main] INFO com.sample.Resilience -- Retrying 100? true

Solution

  • Following @M.Deinum's advice I switched to Resilience4j's Retry and it fixed the problem:

      public static void main(final String[] args) throws InterruptedException {
    
        final RateLimiterConfig config = RateLimiterConfig.custom()
            .limitForPeriod(10)
            .limitRefreshPeriod(Duration.ofSeconds(1))
            .timeoutDuration(Duration.ofMillis(0))
            .build();
        final RateLimiter rateLimiter = RateLimiter.of("exampleService", config);
    
        final RetryConfig retryConfig = RetryConfig.custom()
            .maxAttempts(10)
            .waitDuration(Duration.ofSeconds(1))
            .retryExceptions(RuntimeException.class)
            .build();
        final Retry retry = Retry.of("exampleService", retryConfig);
    
        final CountDownLatch latch = new CountDownLatch(1);
    
        Flux.range(1, 100)
            .flatMapSequential(id -> rateLimitedInvocation(id, retry, rateLimiter)
                .doOnNext(response -> log.info("Response: {}", response))
                .doOnError(throwable -> log.error("Error: {}", throwable)))
            .collectList()
            .doOnNext(it -> log.info("Finished {}", it))
            .doOnTerminate(latch::countDown)
            .subscribe();
    
        latch.await();
    
      }
    
      private static Mono<String> rateLimitedInvocation(final Integer id, final Retry retry, final RateLimiter rateLimiter) {
        return Mono.defer(() -> invokeRemoteService(id))
            .transformDeferred(RateLimiterOperator.of(rateLimiter))
            .transformDeferred(RetryOperator.of(retry));
    
      }
    
      private static Mono<String> invokeRemoteService(final Integer integer) {
        return Mono.just(integer + "");
      }
    

    Result:

    12:13:07.163 [main] INFO com.sample.Resilience -- Response: 1
    12:13:07.166 [main] INFO com.sample.Resilience -- Response: 2
    12:13:07.166 [main] INFO com.sample.Resilience -- Response: 3
    12:13:07.167 [main] INFO com.sample.Resilience -- Response: 4
    12:13:07.167 [main] INFO com.sample.Resilience -- Response: 5
    12:13:07.167 [main] INFO com.sample.Resilience -- Response: 6
    12:13:07.167 [main] INFO com.sample.Resilience -- Response: 7
    12:13:07.167 [main] INFO com.sample.Resilience -- Response: 8
    12:13:07.167 [main] INFO com.sample.Resilience -- Response: 9
    12:13:07.168 [main] INFO com.sample.Resilience -- Response: 10
    12:13:08.186 [parallel-2] INFO com.sample.Resilience -- Response: 12
    12:13:08.186 [parallel-3] INFO com.sample.Resilience -- Response: 13
    12:13:08.186 [parallel-9] INFO com.sample.Resilience -- Response: 19
    12:13:08.186 [parallel-8] INFO com.sample.Resilience -- Response: 18
    12:13:08.186 [parallel-1] INFO com.sample.Resilience -- Response: 11
    12:13:08.186 [parallel-7] INFO com.sample.Resilience -- Response: 17
    12:13:08.186 [parallel-5] INFO com.sample.Resilience -- Response: 15
    12:13:08.186 [parallel-4] INFO com.sample.Resilience -- Response: 14
    12:13:08.186 [parallel-6] INFO com.sample.Resilience -- Response: 16
    12:13:08.186 [parallel-10] INFO com.sample.Resilience -- Response: 20
    12:13:09.188 [parallel-11] INFO com.sample.Resilience -- Response: 21
    12:13:09.188 [parallel-12] INFO com.sample.Resilience -- Response: 24
    12:13:09.188 [parallel-14] INFO com.sample.Resilience -- Response: 22
    12:13:09.188 [parallel-13] INFO com.sample.Resilience -- Response: 23
    12:13:09.188 [parallel-16] INFO com.sample.Resilience -- Response: 28
    12:13:09.188 [parallel-15] INFO com.sample.Resilience -- Response: 27
    12:13:09.189 [parallel-2] INFO com.sample.Resilience -- Response: 30
    12:13:09.189 [parallel-3] INFO com.sample.Resilience -- Response: 25
    12:13:09.189 [parallel-1] INFO com.sample.Resilience -- Response: 29
    12:13:09.189 [parallel-12] INFO com.sample.Resilience -- Response: 38
    12:13:10.190 [parallel-11] INFO com.sample.Resilience -- Response: 39
    12:13:10.191 [parallel-12] INFO com.sample.Resilience -- Response: 31
    12:13:10.190 [parallel-3] INFO com.sample.Resilience -- Response: 35
    12:13:10.190 [parallel-13] INFO com.sample.Resilience -- Response: 32
    12:13:10.190 [parallel-4] INFO com.sample.Resilience -- Response: 43
    12:13:10.190 [parallel-1] INFO com.sample.Resilience -- Response: 41
    12:13:10.190 [parallel-2] INFO com.sample.Resilience -- Response: 34
    12:13:10.191 [parallel-14] INFO com.sample.Resilience -- Response: 33
    12:13:10.190 [parallel-16] INFO com.sample.Resilience -- Response: 40
    12:13:10.191 [parallel-5] INFO com.sample.Resilience -- Response: 36
    12:13:11.191 [parallel-8] INFO com.sample.Resilience -- Response: 48
    12:13:11.191 [parallel-16] INFO com.sample.Resilience -- Response: 45
    12:13:11.191 [parallel-1] INFO com.sample.Resilience -- Response: 44
    12:13:11.191 [parallel-7] INFO com.sample.Resilience -- Response: 47
    12:13:11.191 [parallel-6] INFO com.sample.Resilience -- Response: 49
    12:13:11.191 [parallel-3] INFO com.sample.Resilience -- Response: 37
    12:13:11.191 [parallel-2] INFO com.sample.Resilience -- Response: 26
    12:13:11.191 [parallel-5] INFO com.sample.Resilience -- Response: 46
    12:13:11.191 [parallel-4] INFO com.sample.Resilience -- Response: 42
    12:13:11.191 [parallel-9] INFO com.sample.Resilience -- Response: 51
    12:13:12.193 [parallel-11] INFO com.sample.Resilience -- Response: 55
    12:13:12.193 [parallel-10] INFO com.sample.Resilience -- Response: 57
    12:13:12.193 [parallel-8] INFO com.sample.Resilience -- Response: 50
    12:13:12.193 [parallel-6] INFO com.sample.Resilience -- Response: 53
    12:13:12.193 [parallel-12] INFO com.sample.Resilience -- Response: 54
    12:13:12.193 [parallel-14] INFO com.sample.Resilience -- Response: 59
    12:13:12.193 [parallel-13] INFO com.sample.Resilience -- Response: 61
    12:13:12.194 [parallel-15] INFO com.sample.Resilience -- Response: 60
    12:13:12.193 [parallel-9] INFO com.sample.Resilience -- Response: 58
    12:13:12.193 [parallel-7] INFO com.sample.Resilience -- Response: 52
    12:13:13.195 [parallel-7] INFO com.sample.Resilience -- Response: 56
    12:13:13.195 [parallel-5] INFO com.sample.Resilience -- Response: 63
    12:13:13.195 [parallel-8] INFO com.sample.Resilience -- Response: 71
    12:13:13.195 [parallel-6] INFO com.sample.Resilience -- Response: 62
    12:13:13.195 [parallel-10] INFO com.sample.Resilience -- Response: 70
    12:13:13.195 [parallel-9] INFO com.sample.Resilience -- Response: 68
    12:13:13.195 [parallel-12] INFO com.sample.Resilience -- Response: 72
    12:13:13.195 [parallel-11] INFO com.sample.Resilience -- Response: 69
    12:13:13.195 [parallel-13] INFO com.sample.Resilience -- Response: 75
    12:13:13.195 [parallel-14] INFO com.sample.Resilience -- Response: 74
    12:13:14.196 [parallel-13] INFO com.sample.Resilience -- Response: 65
    12:13:14.196 [parallel-14] INFO com.sample.Resilience -- Response: 73
    12:13:14.196 [parallel-15] INFO com.sample.Resilience -- Response: 67
    12:13:14.196 [parallel-4] INFO com.sample.Resilience -- Response: 83
    12:13:14.196 [parallel-5] INFO com.sample.Resilience -- Response: 79
    12:13:14.196 [parallel-3] INFO com.sample.Resilience -- Response: 66
    12:13:14.196 [parallel-9] INFO com.sample.Resilience -- Response: 85
    12:13:14.196 [parallel-1] INFO com.sample.Resilience -- Response: 77
    12:13:14.196 [parallel-6] INFO com.sample.Resilience -- Response: 81
    12:13:14.196 [parallel-8] INFO com.sample.Resilience -- Response: 84
    12:13:15.198 [parallel-10] INFO com.sample.Resilience -- Response: 80
    12:13:15.198 [parallel-11] INFO com.sample.Resilience -- Response: 82
    12:13:15.198 [parallel-12] INFO com.sample.Resilience -- Response: 64
    12:13:15.198 [parallel-14] INFO com.sample.Resilience -- Response: 78
    12:13:15.198 [parallel-13] INFO com.sample.Resilience -- Response: 76
    12:13:15.198 [parallel-2] INFO com.sample.Resilience -- Response: 91
    12:13:15.198 [parallel-1] INFO com.sample.Resilience -- Response: 94
    12:13:15.198 [parallel-16] INFO com.sample.Resilience -- Response: 93
    12:13:15.198 [parallel-15] INFO com.sample.Resilience -- Response: 89
    12:13:15.199 [parallel-11] INFO com.sample.Resilience -- Response: 98
    12:13:16.200 [parallel-1] INFO com.sample.Resilience -- Response: 96
    12:13:16.200 [parallel-13] INFO com.sample.Resilience -- Response: 97
    12:13:16.200 [parallel-3] INFO com.sample.Resilience -- Response: 95
    12:13:16.200 [parallel-2] INFO com.sample.Resilience -- Response: 99
    12:13:16.200 [parallel-14] INFO com.sample.Resilience -- Response: 87
    12:13:16.200 [parallel-15] INFO com.sample.Resilience -- Response: 88
    12:13:16.200 [parallel-12] INFO com.sample.Resilience -- Response: 92
    12:13:16.200 [parallel-4] INFO com.sample.Resilience -- Response: 100
    12:13:16.200 [parallel-16] INFO com.sample.Resilience -- Response: 86
    12:13:16.201 [parallel-12] INFO com.sample.Resilience -- Response: 90
    12:13:16.201 [parallel-12] INFO com.sample.Resilience -- Finished [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100]