feat: enhanced Magnit scraper with streaming mode and retry logic
- Add streaming mode for memory-efficient large catalog scraping - Implement retry logic with exponential backoff - Add auto session reinitialization on 403 errors - Add configurable options (pageSize, maxProducts, rateLimitDelay) - Add maxIterations protection against infinite loops - Add retry.ts utility module with withRetry and withRetryAndReinit - Update .env.example with new scraping options - Add pgAdmin and CloudBeaver to docker-compose Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
135
src/utils/retry.ts
Normal file
135
src/utils/retry.ts
Normal file
@@ -0,0 +1,135 @@
|
||||
import { Logger } from './logger.js';
|
||||
|
||||
export interface RetryOptions {
|
||||
maxAttempts: number; // default: 3
|
||||
initialDelay: number; // default: 1000ms
|
||||
maxDelay: number; // default: 30000ms
|
||||
backoffMultiplier: number; // default: 2 (exponential)
|
||||
retryableErrors?: string[]; // default: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND']
|
||||
shouldRetry?: (error: any) => boolean;
|
||||
onRetry?: (error: any, attempt: number, delay: number) => void;
|
||||
}
|
||||
|
||||
const DEFAULT_RETRYABLE_ERRORS = [
|
||||
'ECONNRESET',
|
||||
'ETIMEDOUT',
|
||||
'ENOTFOUND',
|
||||
'ECONNREFUSED',
|
||||
'ENETUNREACH',
|
||||
'EAI_AGAIN'
|
||||
];
|
||||
|
||||
export async function withRetry<T>(
|
||||
operation: () => Promise<T>,
|
||||
options: Partial<RetryOptions> = {}
|
||||
): Promise<T> {
|
||||
const {
|
||||
maxAttempts = 3,
|
||||
initialDelay = 1000,
|
||||
maxDelay = 30000,
|
||||
backoffMultiplier = 2,
|
||||
retryableErrors = DEFAULT_RETRYABLE_ERRORS,
|
||||
shouldRetry,
|
||||
onRetry
|
||||
} = options;
|
||||
|
||||
let lastError: any;
|
||||
|
||||
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
|
||||
try {
|
||||
return await operation();
|
||||
} catch (error: any) {
|
||||
lastError = error;
|
||||
|
||||
// Проверяем, нужно ли retry
|
||||
const isRetryable = shouldRetry
|
||||
? shouldRetry(error)
|
||||
: isErrorRetryable(error, retryableErrors);
|
||||
|
||||
if (!isRetryable || attempt === maxAttempts) {
|
||||
throw error;
|
||||
}
|
||||
|
||||
// Exponential backoff
|
||||
const delay = Math.min(
|
||||
initialDelay * Math.pow(backoffMultiplier, attempt - 1),
|
||||
maxDelay
|
||||
);
|
||||
|
||||
Logger.warn(
|
||||
`Попытка ${attempt}/${maxAttempts} не удалась: ${error.message}. ` +
|
||||
`Повтор через ${delay}ms...`
|
||||
);
|
||||
|
||||
if (onRetry) {
|
||||
onRetry(error, attempt, delay);
|
||||
}
|
||||
|
||||
await sleep(delay);
|
||||
}
|
||||
}
|
||||
|
||||
throw lastError;
|
||||
}
|
||||
|
||||
function isErrorRetryable(error: any, retryableErrors: string[]): boolean {
|
||||
// Network errors
|
||||
if (error.code && retryableErrors.includes(error.code)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// HTTP 5xx errors (server errors)
|
||||
if (error.response?.status >= 500 && error.response?.status < 600) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// HTTP 429 (Too Many Requests)
|
||||
if (error.response?.status === 429) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
function sleep(ms: number): Promise<void> {
|
||||
return new Promise(resolve => setTimeout(resolve, ms));
|
||||
}
|
||||
|
||||
// Специальная утилита для retry с автоматической переинициализацией сессии
|
||||
export interface RetryWithReinitOptions extends RetryOptions {
|
||||
reinitOn403?: boolean; // default: true
|
||||
onReinit?: () => Promise<void>;
|
||||
}
|
||||
|
||||
export async function withRetryAndReinit<T>(
|
||||
operation: () => Promise<T>,
|
||||
options: Partial<RetryWithReinitOptions> = {}
|
||||
): Promise<T> {
|
||||
const { reinitOn403 = true, onReinit, ...retryOptions } = options;
|
||||
|
||||
return withRetry(operation, {
|
||||
...retryOptions,
|
||||
shouldRetry: (error: any) => {
|
||||
// 403 Forbidden - требуется переинициализация сессии
|
||||
if (error.response?.status === 403 && reinitOn403) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Другие retryable ошибки
|
||||
return isErrorRetryable(error, retryOptions.retryableErrors || DEFAULT_RETRYABLE_ERRORS);
|
||||
},
|
||||
onRetry: async (error: any, attempt: number, delay: number) => {
|
||||
// Если 403 и есть callback переинициализации
|
||||
if (error.response?.status === 403 && onReinit) {
|
||||
Logger.warn('Получен 403 Forbidden. Переинициализация сессии...');
|
||||
await onReinit();
|
||||
Logger.info('✅ Сессия переинициализирована');
|
||||
}
|
||||
|
||||
// Вызов пользовательского callback
|
||||
if (retryOptions.onRetry) {
|
||||
retryOptions.onRetry(error, attempt, delay);
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
Reference in New Issue
Block a user