refactor: reorganize scripts - move debug code to experiments/

- Move debug/test scripts from src/scripts/ to experiments/
- Remove test-detail-endpoint from package.json
- Delete temp-product-page.html
- Move E2E_GUIDE.md to docs/
- Add experiments/README.md with documentation
- Keep only production scripts in src/scripts/
- Clean up tsconfig.json exclude list (experiments are now outside src/)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-01-22 01:55:20 +05:00
parent 3299cca574
commit dd4c64c601
10 changed files with 675 additions and 1 deletions

42
experiments/README.md Normal file
View File

@@ -0,0 +1,42 @@
# Experiments Directory
This directory contains archived debug and experimental scripts used during development.
## Structure
### `magnit-detail-endpoints/`
Scripts used to discover and test Magnit API endpoints for product details:
- `debug-detail-response.ts` - Debug API response structure and field parsing
- `test-detail-endpoint.ts` - Test specific detail endpoints using MagnitApiScraper
- `test-all-detail-endpoints.ts` - Test multiple endpoints for product details
- `test-object-reviews-endpoint.ts` - Test user-reviews-and-object-info endpoint
- `find-product-detail-api.ts` - Find correct API endpoints for product details
- `find-product-detail-endpoint-v1.ts` - Find v1 API endpoints for product details
**Purpose**: These scripts were used to reverse-engineer the Magnit product detail API during the enrichment feature development.
### `html-extraction/`
Scripts for web scraping fallback exploration:
- `extract-product-from-html.ts` - Extract product data from HTML for future Playwright-based web scraping
**Purpose**: Experimental code for planned web scraping functionality (Phase 5).
## Usage
These scripts are **not production code** and are kept for reference. They may:
- Use DOM APIs (`document`, `window`) that require browser context
- Have hard-coded test data
- Be one-off experiments that are no longer maintained
To run these scripts, you may need to adjust `tsconfig.json` to include DOM types:
```json
{
"compilerOptions": {
"lib": ["ES2023", "dom"]
}
}
```
## Status
Archived - No longer actively maintained. Kept for historical reference and potential future use.

View File

@@ -0,0 +1,116 @@
import 'dotenv/config';
import { chromium } from 'playwright';
import * as fs from 'fs';
import { Logger } from '../utils/logger.js';
async function main() {
Logger.info('=== Извлечение данных о товаре из HTML ===\n');
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext();
const page = await context.newPage();
const productUrl = 'https://magnit.ru/product/1000233138-podguzniki_la_fresh_dlya_vzroslykh_l_10sht?shopCode=992301&shopType=6';
Logger.info(`Загружаю страницу: ${productUrl}`);
await page.goto(productUrl, {
waitUntil: 'domcontentloaded',
timeout: 20000,
});
await page.waitForTimeout(3000);
// Извлекаем данные из HTML
const productData = await page.evaluate(() => {
const result: any = {
title: document.querySelector('h1')?.textContent?.trim() || '',
// Ищем brand, description, weight в разных местах
};
// 1. Ищем в meta тегах
const metaBrand = document.querySelector('meta[itemprop="brand"]')?.content;
const metaDesc = document.querySelector('meta[itemprop="description"]')?.content;
const metaWeight = document.querySelector('meta[itemprop="weight"]')?.content;
// 2. Ищем в JSON-LD structured data
const jsonLdScripts = Array.from(document.querySelectorAll('script[type="application/ld+json"]'));
for (const script of jsonLdScripts) {
try {
const json = JSON.parse(script.textContent || '');
if (json['@type'] === 'Product' || json.name === 'Product') {
result.jsonLd = json;
break;
}
} catch (e) {}
}
// 3. Ищем в window объектах
const nuxtData = (window as any).__NUXT__;
if (nuxtData) {
result.nuxtKeys = Object.keys(nuxtData);
// Проверяем все возможные места с данными о товаре
for (const key of Object.keys(nuxtData)) {
const val = nuxtData[key];
if (val && typeof val === 'object') {
const str = JSON.stringify(val);
if (str.includes('brand') || str.includes('description') || str.includes('weight')) {
result.nuxtDataKey = key;
result.nuxtDataPreview = str.substring(0, 500);
break;
}
}
}
}
// 4. Ищем в других script тегах
const allScripts = Array.from(document.querySelectorAll('script'));
for (const script of allScripts) {
const text = script.textContent || '';
if (text.includes('"brand"') && text.length > 100 && text.length < 100000) {
try {
// Попробуем найти JSON
const match = text.match(/\{[\s\S]*\}/);
if (match) {
try {
const json = JSON.parse(match[0]);
if (json.brand || json.description || json.weight) {
result.foundInScript = true;
result.scriptDataPreview = JSON.stringify(json).substring(0, 500);
break;
}
} catch (e2) {}
}
} catch (e) {}
}
}
// 5. Ищем в data-атрибутах
const productElement = document.querySelector('[data-product-id], [data-product], [id*="product"]');
if (productElement) {
result.productElement = productElement.outerHTML.substring(0, 500);
}
// 6. Проверяем структурированные данные
result.structuredData = {
metaBrand,
metaDesc,
metaWeight,
};
return result;
});
Logger.info('=== РЕЗУЛЬТАТЫ ===\n');
Logger.info(JSON.stringify(productData, null, 2));
// Также сохраним HTML для анализа
const html = await page.content();
const outputPath = 'temp-product-page.html';
fs.writeFileSync(outputPath, html, 'utf-8');
Logger.info(`\nHTML сохранен в: ${outputPath}`);
await browser.close();
}
main();

View File

@@ -0,0 +1,81 @@
import 'dotenv/config';
import { chromium } from 'playwright';
import axios from 'axios';
import { Logger } from '../utils/logger.js';
async function main() {
Logger.info('=== Debug: Смотрим что возвращает API ===\n');
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext();
const page = await context.newPage();
await page.goto('https://magnit.ru/', { waitUntil: 'domcontentloaded' });
const cookies = await context.cookies();
const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ');
const mgUdiCookie = cookies.find(c => c.name === 'mg_udi');
const deviceId = mgUdiCookie?.value || '';
const httpClient = axios.create({
baseURL: 'https://magnit.ru',
headers: {
'Content-Type': 'application/json',
'Accept': '*/*',
'Cookie': cookieStr,
'x-device-id': deviceId,
'x-client-name': 'magnit',
'x-device-platform': 'Web',
'x-new-magnit': 'true',
},
});
await browser.close();
// Тестируем на конкретном товаре из логов
const productId = '1000201813'; // Презервативы Durex - там был brand
const endpoint = `/webgate/v2/goods/${productId}/stores/992301?storetype=2&catalogtype=1`;
Logger.info(`Запрос: ${endpoint}`);
try {
const response = await httpClient.get(endpoint);
Logger.info(`Status: ${response.status}\n`);
const data = response.data;
console.log(JSON.stringify(data, null, 2));
// Анализируем что есть в ответе
if (data.details && data.details.length > 0) {
Logger.info(`\n=== АНАЛИЗ details массива (${data.details.length} элементов) ===\n`);
for (let i = 0; i < Math.min(data.details.length, 15); i++) {
const detail = data.details[i];
Logger.info(`${i + 1}. name: "${detail.name}" | value: "${detail.value}"`);
// Проверяем парсинг
const name = detail.name.toLowerCase();
if (name.includes('бренд') || name === 'brand') {
Logger.info(` → Это БРЕНД!`);
} else if (name.includes('описание') || name === 'description') {
Logger.info(` → Это ОПИСАНИЕ!`);
} else if (name.includes('вес') || name.includes('weight')) {
Logger.info(` → Это ВЕС!`);
} else if (name.includes('единица') || name.includes('unit')) {
Logger.info(` → Это ЕДИНИЦА!`);
}
}
}
if (data.categories && data.categories.length > 0) {
Logger.info(`\nCategories: ${data.categories.join(', ')}`);
}
} catch (error: any) {
Logger.error('Ошибка:', error.message);
}
}
main();

View File

@@ -0,0 +1,149 @@
import 'dotenv/config';
import { chromium } from 'playwright';
import axios from 'axios';
import { Logger } from '../utils/logger.js';
async function findDetailApiViaDirectRequest() {
Logger.info('=== МЕТОД 1: Прямой GET запрос к API ===\n');
const productId = '1000233138';
const storeCode = process.env.MAGNIT_STORE_CODE || '992301';
// Сначала получим cookies через Playwright
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext();
const page = await context.newPage();
await page.goto('https://magnit.ru/', { waitUntil: 'domcontentloaded' });
const cookies = await context.cookies();
const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ');
const mgUdiCookie = cookies.find(c => c.name === 'mg_udi');
const deviceId = mgUdiCookie?.value || '';
await browser.close();
// Теперь пробуем разные endpoints
const httpClient = axios.create({
baseURL: 'https://magnit.ru',
headers: {
'Content-Type': 'application/json',
'Accept': '*/*',
'Cookie': cookieStr,
'x-device-id': deviceId,
'x-client-name': 'magnit',
'x-device-platform': 'Web',
'x-new-magnit': 'true',
},
});
const endpoints = [
`/webgate/v2/goods/${productId}?storeCode=${storeCode}&storeType=6`,
`/webgate/v2/goods/${productId}?shopCode=${storeCode}&shopType=6`,
`/webgate/v2/products/${productId}?storeCode=${storeCode}`,
`/webgate/v2/catalog/product/${productId}?storeCode=${storeCode}`,
];
for (const endpoint of endpoints) {
try {
Logger.info(`Пробую: GET ${endpoint}`);
const response = await httpClient.get(endpoint);
Logger.info(`✅ Status: ${response.status}`);
if (response.data) {
const json = JSON.stringify(response.data);
if (json.length < 2000) {
Logger.info(`Response: ${json}`);
} else {
Logger.info(`Response (preview): ${json.substring(0, 500)}...`);
}
}
break; // Если успешно, выходим
} catch (error: any) {
if (error.response?.status === 404) {
Logger.info(` ❌ 404 Not Found`);
} else if (error.response?.status === 403) {
Logger.info(` ❌ 403 Forbidden`);
} else {
Logger.info(`${error.message}`);
}
}
}
}
async function extractFromSSR() {
Logger.info('\n=== МЕТОД 2: Извлечение данных из SSR HTML ===\n');
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext();
const page = await context.newPage();
const productUrl = 'https://magnit.ru/product/1000233138-podguzniki_la_fresh_dlya_vzroslykh_l_10sht?shopCode=992301&shopType=6';
Logger.info(`Загружаю страницу (без networkidle): ${productUrl}`);
try {
await page.goto(productUrl, {
waitUntil: 'domcontentloaded',
timeout: 15000,
});
// Ждем немного для рендеринга
await page.waitForTimeout(2000);
// Проверяем данные в HTML
const productData = await page.evaluate(() => {
// Ищем данные в window.__NUXT__
if ((window as any).__NUXT__) {
const nuxtData = (window as any).__NUXT__;
return {
source: '__NUXT__',
keys: Object.keys(nuxtData),
// Проверяем ключи с данными о товаре
data: nuxtData.data || nuxtData.pinia || null,
};
}
// Ищем JSON в скриптах
const scripts = Array.from(document.querySelectorAll('script[type="application/json"]'));
for (const script of scripts) {
try {
const json = JSON.parse(script.textContent || '');
if (json.product || json.goods || json.data?.product) {
return { source: 'application/json script', data: json };
}
} catch (e) {}
}
return { found: false };
});
if (productData.source) {
Logger.info(`✅ Данные найдены в: ${productData.source}`);
Logger.info(`Ключи: ${JSON.stringify(productData.keys || Object.keys(productData.data || {}))}`);
if (productData.data) {
Logger.info(`Данные (превью): ${JSON.stringify(productData.data).substring(0, 1000)}...`);
}
} else {
Logger.info(`❌ Данные не найдены`);
}
// Также сохраним HTML для анализа
const html = await page.content();
Logger.info(`\nHTML размер: ${html.length} символов`);
Logger.info(`HTML содержит "brand": ${html.includes('"brand"')} раз`);
Logger.info(`HTML содержит "description": ${html.includes('"description"')} раз`);
Logger.info(`HTML содержит "weight": ${html.includes('"weight"')} раз`);
} catch (error) {
Logger.error(`Ошибка: ${error}`);
} finally {
await browser.close();
}
}
async function main() {
await findDetailApiViaDirectRequest();
await extractFromSSR();
}
main();

View File

@@ -0,0 +1,93 @@
import 'dotenv/config';
import { chromium } from 'playwright';
import axios from 'axios';
import { Logger } from '../utils/logger.js';
async function main() {
Logger.info('=== Поиск endpoint для ДЕТАЛЕЙ товара ===\n');
// Получаем cookies
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext();
const page = await context.newPage();
await page.goto('https://magnit.ru/', { waitUntil: 'domcontentloaded' });
const cookies = await context.cookies();
const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ');
const mgUdiCookie = cookies.find(c => c.name === 'mg_udi');
const deviceId = mgUdiCookie?.value || '';
const httpClient = axios.create({
baseURL: 'https://magnit.ru',
headers: {
'Content-Type': 'application/json',
'Accept': '*/*',
'Cookie': cookieStr,
'x-device-id': deviceId,
'x-client-name': 'magnit',
'x-device-platform': 'Web',
'x-new-magnit': 'true',
},
});
await browser.close();
const productId = '1000233138';
// Разные возможные endpoints для деталей товара
const endpoints = [
// v1 API (пользователь нашел reviews через v1)
`/webgate/v1/goods/${productId}?storeCode=992301&storeType=6`,
`/webgate/v1/products/${productId}?storeCode=992301`,
`/webgate/v1/catalog/product/${productId}?storeCode=992301`,
`/webgate/v1/listing/goods/${productId}?storeCode=992301`,
`/webgate/v1/listing/product/${productId}?storeCode=992301`,
// Другие варианты
`/webgate/v1/products/detail/${productId}?storeCode=992301`,
`/webgate/v1/object?productId=${productId}&storeCode=992301`,
`/webgate/v1/item/${productId}?storeCode=992301`,
];
for (const endpoint of endpoints) {
try {
Logger.info(`Пробую: ${endpoint}`);
const response = await httpClient.get(endpoint);
if (response.status === 200) {
Logger.info(`✅ Status: ${response.status}`);
const json = JSON.stringify(response.data);
if (json.length < 3000) {
Logger.info(`Response:\n${json}`);
} else {
// Проверяем, есть ли полезные поля
const data = response.data;
const hasDetails = data.brand || data.description || data.weight || data.unit;
if (hasDetails) {
Logger.info(`=== НАЙДЕНЫ ДЕТАЛИ ТОВАРА ===`);
Logger.info(`brand: ${data.brand}`);
Logger.info(`description: ${data.description?.substring(0, 100)}`);
Logger.info(`weight: ${data.weight}`);
Logger.info(`unit: ${data.unit}`);
break;
} else {
Logger.info(`Response без деталей товара (preview):`);
Logger.info(json.substring(0, 500) + '...');
}
}
}
} catch (error: any) {
if (error.response?.status === 404) {
Logger.info(` ❌ 404 Not Found`);
} else if (error.response?.status === 403) {
Logger.info(` ❌ 403 Forbidden`);
} else {
Logger.info(`${error.message}`);
}
}
}
}
main();

View File

@@ -0,0 +1,84 @@
import 'dotenv/config';
import { chromium } from 'playwright';
import axios from 'axios';
import { Logger } from '../utils/logger.js';
async function main() {
Logger.info('=== Тестирование всех endpoints для деталей товара ===\n');
// Получаем cookies
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext();
const page = await context.newPage();
await page.goto('https://magnit.ru/', { waitUntil: 'domcontentloaded' });
const cookies = await context.cookies();
const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ');
const mgUdiCookie = cookies.find(c => c.name === 'mg_udi');
const deviceId = mgUdiCookie?.value || '';
const httpClient = axios.create({
baseURL: 'https://magnit.ru',
headers: {
'Content-Type': 'application/json',
'Accept': '*/*',
'Cookie': cookieStr,
'x-device-id': deviceId,
'x-client-name': 'magnit',
'x-device-platform': 'Web',
'x-new-magnit': 'true',
},
});
await browser.close();
const endpoints = [
{
name: '🔍 user-reviews-and-object-info (ДЕТАЛИ ТОВАРА)',
url: '/webgate/v1/listing/user-reviews-and-object-info?service=dostavka&objectType=product&objectId=1000530495',
},
{
name: '🏷️ promotions/type',
url: '/webgate/v1/promotions/type?adult=true&type=19&limit=10&storeCode=996609',
},
{
name: '🏪 goods/{id}/stores/{storeCode}',
url: '/webgate/v2/goods/1000530495/stores/996609?storetype=2&catalogtype=1',
},
];
for (const { name, url } of endpoints) {
try {
Logger.info(`\n${name}`);
Logger.info(`URL: ${url}`);
const response = await httpClient.get(url);
Logger.info(`✅ Status: ${response.status}`);
const data = response.data;
const json = JSON.stringify(data, null, 2);
// Показываем только ключевые поля
if (json.length < 2000) {
Logger.info(`Response:\n${json}`);
} else {
Logger.info(`Response (превью):`);
console.log(json.substring(0, 800) + '...');
}
// Проверяем наличие ключевых полей
const hasDetails = data.brand || data.description || data.weight || data.unit ||
data.objectInfo?.brand || data.objectInfo?.description ||
data.product?.brand || data.product?.description;
if (hasDetails) {
Logger.info(`\n⭐ НАЙДЕНЫ ДЕТАЛИ ТОВАРА! ⭐️⭐️`);
}
} catch (error: any) {
Logger.info(`❌ Error: ${error.response?.status || error.message}`);
}
}
}
main();

View File

@@ -0,0 +1,55 @@
import 'dotenv/config';
import { MagnitApiScraper } from '../scrapers/api/magnit/MagnitApiScraper.js';
import { Logger } from '../utils/logger.js';
async function main() {
const storeCode = process.env.MAGNIT_STORE_CODE || '992301';
const productId = '1000233138'; // Из inspect скрипта
const scraper = new MagnitApiScraper({
storeCode,
storeType: process.env.MAGNIT_STORE_TYPE || '6',
catalogType: process.env.MAGNIT_CATALOG_TYPE || '1',
headless: process.env.MAGNIT_HEADLESS !== 'false',
});
try {
await scraper.initialize();
Logger.info(`Попытка получить детали товара ${productId}...\n`);
// Пробуем разные возможные endpoints для деталей товара
const endpoints = [
`/webgate/v2/goods/${productId}`,
`/webgate/v2/products/${productId}`,
`/webgate/v2/catalog/product/${productId}`,
`/webgate/v2/goods/detail/${productId}`,
];
for (const endpoint of endpoints) {
try {
Logger.info(`Пробую: ${endpoint}`);
const response = await (scraper as any).httpClient.get(endpoint);
Logger.info(`✅ Успех! Status: ${response.status}`);
Logger.info(JSON.stringify(response.data, null, 2));
break; // Если успешно, выходим из цикла
} catch (error: any) {
if (error.response?.status === 404) {
Logger.info(` ❌ 404 Not Found`);
} else if (error.response?.status === 403) {
Logger.info(` ❌ 403 Forbidden (нужна аутентификация)`);
} else {
Logger.info(`${error.response?.status || error.message}`);
}
}
}
} catch (error) {
Logger.error('❌ Ошибка:', error);
process.exit(1);
} finally {
await scraper.close();
}
}
main();

View File

@@ -0,0 +1,55 @@
import 'dotenv/config';
import { chromium } from 'playwright';
import axios from 'axios';
import { Logger } from '../utils/logger.js';
async function main() {
Logger.info('=== Тестирование API endpoint для деталей товара ===\n');
// Получаем cookies через Playwright
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext();
const page = await context.newPage();
await page.goto('https://magnit.ru/', { waitUntil: 'domcontentloaded' });
const cookies = await context.cookies();
const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ');
const mgUdiCookie = cookies.find(c => c.name === 'mg_udi');
const deviceId = mgUdiCookie?.value || '';
await browser.close();
// Создаем HTTP клиент
const httpClient = axios.create({
baseURL: 'https://magnit.ru',
headers: {
'Content-Type': 'application/json',
'Accept': '*/*',
'Cookie': cookieStr,
'x-device-id': deviceId,
'x-client-name': 'magnit',
'x-device-platform': 'Web',
'x-new-magnit': 'true',
},
});
// Пробуем endpoint который нашел пользователь
const endpoint = '/webgate/v1/listing/object-reviews?service=dostavka&objectId=1000530495&objectType=product&page=0&size=10';
try {
Logger.info(`Запрос: ${endpoint}`);
const response = await httpClient.get(endpoint);
Logger.info(`✅ Status: ${response.status}`);
Logger.info(`\n=== ОТВЕТ API ===`);
console.log(JSON.stringify(response.data, null, 2));
} catch (error: any) {
Logger.error(`Ошибка: ${error.message}`);
if (error.response) {
Logger.error(`Status: ${error.response.status}`);
Logger.error(`Data:`, error.response.data);
}
}
}
main();

View File

@@ -10,7 +10,6 @@
"dev": "tsx src/scripts/scrape-magnit-products.ts", "dev": "tsx src/scripts/scrape-magnit-products.ts",
"enrich": "tsx src/scripts/enrich-product-details.ts", "enrich": "tsx src/scripts/enrich-product-details.ts",
"test-db": "tsx src/scripts/test-db-connection.ts", "test-db": "tsx src/scripts/test-db-connection.ts",
"test-detail-endpoint": "tsx src/scripts/test-all-detail-endpoints.ts",
"prisma:generate": "prisma generate", "prisma:generate": "prisma generate",
"prisma:migrate": "prisma migrate dev", "prisma:migrate": "prisma migrate dev",
"prisma:studio": "prisma studio --config=prisma.config.ts", "prisma:studio": "prisma studio --config=prisma.config.ts",