Add retry logic for transient login failures

Adds configurable retry mechanism with basic exponential backoff to handle intermittent failures when authenticating to container registries, particularly GCP (GAR/GCR) where I'm seeing errors intermittently.

- Add retry-attempts input (default: 0 for backward compatibility, making it opt in)
- Add retry-delay input (default: 5000ms)
- Implement exponential backoff retry logic in docker login
  - Chose to just write a simple retry function vs. going with a library
- Retry all errors except 5xxs
  - I'm seeing intermittent 401 failures
- Add tests for retry behavior
- Update README with new input parameters

Signed-off-by: Naush Korai <naush.korai@mixpanel.com>
This commit is contained in:
Naush Korai 2026-01-30 13:32:24 -05:00
parent 3227f5311c
commit 47690b2d19
7 changed files with 201 additions and 32 deletions

View file

@ -2,6 +2,7 @@ import {expect, jest, test} from '@jest/globals';
import * as path from 'path';
import {loginStandard, logout} from '../src/docker';
import {RetryArgs} from '../src/context';
import {Docker} from '@docker/actions-toolkit/lib/docker/docker';
@ -62,3 +63,103 @@ test('logout calls exec', async () => {
ignoreReturnCode: true
});
});
test('loginStandard retries on failure', async () => {
jest.useFakeTimers();
let attemptCount = 0;
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const execSpy = jest.spyOn(Docker, 'getExecOutput').mockImplementation(async () => {
attemptCount++;
if (attemptCount < 3) {
return {
exitCode: 1,
stdout: '',
stderr: 'Error: timeout exceeded'
};
}
return {
exitCode: 0,
stdout: 'Login Succeeded',
stderr: ''
};
});
const username = 'dbowie';
const password = 'groundcontrol';
const registry = 'https://ghcr.io';
const retryArgs: RetryArgs = {attempts: 3, delayMs: 100};
const loginPromise = loginStandard(registry, username, password, undefined, retryArgs);
await jest.runAllTimersAsync();
await loginPromise;
expect(execSpy).toHaveBeenCalledTimes(3);
expect(attemptCount).toBe(3);
jest.useRealTimers();
});
test('loginStandard does not retry when attempts is 0', async () => {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const execSpy = jest.spyOn(Docker, 'getExecOutput').mockImplementation(async () => {
return {
exitCode: 1,
stdout: '',
stderr: 'Error: timeout exceeded'
};
});
const username = 'dbowie';
const password = 'groundcontrol';
const registry = 'https://ghcr.io';
const retryArgs: RetryArgs = {attempts: 0, delayMs: 100};
await expect(loginStandard(registry, username, password, undefined, retryArgs)).rejects.toThrow('timeout exceeded');
expect(execSpy).toHaveBeenCalledTimes(1);
});
test('loginStandard fails after max retries', async () => {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const execSpy = jest.spyOn(Docker, 'getExecOutput').mockImplementation(async () => {
return {
exitCode: 1,
stdout: '',
stderr: 'Error: timeout exceeded'
};
});
const username = 'dbowie';
const password = 'groundcontrol';
const registry = 'https://ghcr.io';
const retryArgs: RetryArgs = {attempts: 2, delayMs: 10};
await expect(loginStandard(registry, username, password, undefined, retryArgs)).rejects.toThrow('timeout exceeded');
expect(execSpy).toHaveBeenCalledTimes(3);
});
test('loginStandard does not retry on 5xx errors', async () => {
// eslint-disable-next-line @typescript-eslint/ban-ts-comment
// @ts-ignore
const execSpy = jest.spyOn(Docker, 'getExecOutput').mockImplementation(async () => {
return {
exitCode: 1,
stdout: '',
stderr: 'Error: 500 Internal Server Error'
};
});
const username = 'dbowie';
const password = 'groundcontrol';
const registry = 'https://ghcr.io';
const retryArgs: RetryArgs = {attempts: 3, delayMs: 100};
await expect(loginStandard(registry, username, password, undefined, retryArgs)).rejects.toThrow('500 Internal Server Error');
expect(execSpy).toHaveBeenCalledTimes(1);
});