feat: enhanced Magnit scraper with streaming mode and retry logic

- Add streaming mode for memory-efficient large catalog scraping
- Implement retry logic with exponential backoff
- Add auto session reinitialization on 403 errors
- Add configurable options (pageSize, maxProducts, rateLimitDelay)
- Add maxIterations protection against infinite loops
- Add retry.ts utility module with withRetry and withRetryAndReinit
- Update .env.example with new scraping options
- Add pgAdmin and CloudBeaver to docker-compose

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-01-21 22:14:04 +05:00
parent 19c0426cdc
commit 9164527f58
5 changed files with 585 additions and 74 deletions

135
src/utils/retry.ts Normal file
View File

@@ -0,0 +1,135 @@
import { Logger } from './logger.js';
export interface RetryOptions {
maxAttempts: number; // default: 3
initialDelay: number; // default: 1000ms
maxDelay: number; // default: 30000ms
backoffMultiplier: number; // default: 2 (exponential)
retryableErrors?: string[]; // default: ['ECONNRESET', 'ETIMEDOUT', 'ENOTFOUND']
shouldRetry?: (error: any) => boolean;
onRetry?: (error: any, attempt: number, delay: number) => void;
}
const DEFAULT_RETRYABLE_ERRORS = [
'ECONNRESET',
'ETIMEDOUT',
'ENOTFOUND',
'ECONNREFUSED',
'ENETUNREACH',
'EAI_AGAIN'
];
export async function withRetry<T>(
operation: () => Promise<T>,
options: Partial<RetryOptions> = {}
): Promise<T> {
const {
maxAttempts = 3,
initialDelay = 1000,
maxDelay = 30000,
backoffMultiplier = 2,
retryableErrors = DEFAULT_RETRYABLE_ERRORS,
shouldRetry,
onRetry
} = options;
let lastError: any;
for (let attempt = 1; attempt <= maxAttempts; attempt++) {
try {
return await operation();
} catch (error: any) {
lastError = error;
// Проверяем, нужно ли retry
const isRetryable = shouldRetry
? shouldRetry(error)
: isErrorRetryable(error, retryableErrors);
if (!isRetryable || attempt === maxAttempts) {
throw error;
}
// Exponential backoff
const delay = Math.min(
initialDelay * Math.pow(backoffMultiplier, attempt - 1),
maxDelay
);
Logger.warn(
`Попытка ${attempt}/${maxAttempts} не удалась: ${error.message}. ` +
`Повтор через ${delay}ms...`
);
if (onRetry) {
onRetry(error, attempt, delay);
}
await sleep(delay);
}
}
throw lastError;
}
function isErrorRetryable(error: any, retryableErrors: string[]): boolean {
// Network errors
if (error.code && retryableErrors.includes(error.code)) {
return true;
}
// HTTP 5xx errors (server errors)
if (error.response?.status >= 500 && error.response?.status < 600) {
return true;
}
// HTTP 429 (Too Many Requests)
if (error.response?.status === 429) {
return true;
}
return false;
}
function sleep(ms: number): Promise<void> {
return new Promise(resolve => setTimeout(resolve, ms));
}
// Специальная утилита для retry с автоматической переинициализацией сессии
export interface RetryWithReinitOptions extends RetryOptions {
reinitOn403?: boolean; // default: true
onReinit?: () => Promise<void>;
}
export async function withRetryAndReinit<T>(
operation: () => Promise<T>,
options: Partial<RetryWithReinitOptions> = {}
): Promise<T> {
const { reinitOn403 = true, onReinit, ...retryOptions } = options;
return withRetry(operation, {
...retryOptions,
shouldRetry: (error: any) => {
// 403 Forbidden - требуется переинициализация сессии
if (error.response?.status === 403 && reinitOn403) {
return true;
}
// Другие retryable ошибки
return isErrorRetryable(error, retryOptions.retryableErrors || DEFAULT_RETRYABLE_ERRORS);
},
onRetry: async (error: any, attempt: number, delay: number) => {
// Если 403 и есть callback переинициализации
if (error.response?.status === 403 && onReinit) {
Logger.warn('Получен 403 Forbidden. Переинициализация сессии...');
await onReinit();
Logger.info('✅ Сессия переинициализирована');
}
// Вызов пользовательского callback
if (retryOptions.onRetry) {
retryOptions.onRetry(error, attempt, delay);
}
}
});
}