refactor: reorganize scripts - move debug code to experiments/

- Move debug/test scripts from src/scripts/ to experiments/
- Remove test-detail-endpoint from package.json
- Delete temp-product-page.html
- Move E2E_GUIDE.md to docs/
- Add experiments/README.md with documentation
- Keep only production scripts in src/scripts/
- Clean up tsconfig.json exclude list (experiments are now outside src/)

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-01-22 01:55:20 +05:00
parent 3299cca574
commit dd4c64c601
10 changed files with 675 additions and 1 deletions

View File

@@ -0,0 +1,116 @@
import 'dotenv/config';
import { chromium } from 'playwright';
import * as fs from 'fs';
import { Logger } from '../utils/logger.js';
async function main() {
Logger.info('=== Извлечение данных о товаре из HTML ===\n');
const browser = await chromium.launch({ headless: true });
const context = await browser.newContext();
const page = await context.newPage();
const productUrl = 'https://magnit.ru/product/1000233138-podguzniki_la_fresh_dlya_vzroslykh_l_10sht?shopCode=992301&shopType=6';
Logger.info(`Загружаю страницу: ${productUrl}`);
await page.goto(productUrl, {
waitUntil: 'domcontentloaded',
timeout: 20000,
});
await page.waitForTimeout(3000);
// Извлекаем данные из HTML
const productData = await page.evaluate(() => {
const result: any = {
title: document.querySelector('h1')?.textContent?.trim() || '',
// Ищем brand, description, weight в разных местах
};
// 1. Ищем в meta тегах
const metaBrand = document.querySelector('meta[itemprop="brand"]')?.content;
const metaDesc = document.querySelector('meta[itemprop="description"]')?.content;
const metaWeight = document.querySelector('meta[itemprop="weight"]')?.content;
// 2. Ищем в JSON-LD structured data
const jsonLdScripts = Array.from(document.querySelectorAll('script[type="application/ld+json"]'));
for (const script of jsonLdScripts) {
try {
const json = JSON.parse(script.textContent || '');
if (json['@type'] === 'Product' || json.name === 'Product') {
result.jsonLd = json;
break;
}
} catch (e) {}
}
// 3. Ищем в window объектах
const nuxtData = (window as any).__NUXT__;
if (nuxtData) {
result.nuxtKeys = Object.keys(nuxtData);
// Проверяем все возможные места с данными о товаре
for (const key of Object.keys(nuxtData)) {
const val = nuxtData[key];
if (val && typeof val === 'object') {
const str = JSON.stringify(val);
if (str.includes('brand') || str.includes('description') || str.includes('weight')) {
result.nuxtDataKey = key;
result.nuxtDataPreview = str.substring(0, 500);
break;
}
}
}
}
// 4. Ищем в других script тегах
const allScripts = Array.from(document.querySelectorAll('script'));
for (const script of allScripts) {
const text = script.textContent || '';
if (text.includes('"brand"') && text.length > 100 && text.length < 100000) {
try {
// Попробуем найти JSON
const match = text.match(/\{[\s\S]*\}/);
if (match) {
try {
const json = JSON.parse(match[0]);
if (json.brand || json.description || json.weight) {
result.foundInScript = true;
result.scriptDataPreview = JSON.stringify(json).substring(0, 500);
break;
}
} catch (e2) {}
}
} catch (e) {}
}
}
// 5. Ищем в data-атрибутах
const productElement = document.querySelector('[data-product-id], [data-product], [id*="product"]');
if (productElement) {
result.productElement = productElement.outerHTML.substring(0, 500);
}
// 6. Проверяем структурированные данные
result.structuredData = {
metaBrand,
metaDesc,
metaWeight,
};
return result;
});
Logger.info('=== РЕЗУЛЬТАТЫ ===\n');
Logger.info(JSON.stringify(productData, null, 2));
// Также сохраним HTML для анализа
const html = await page.content();
const outputPath = 'temp-product-page.html';
fs.writeFileSync(outputPath, html, 'utf-8');
Logger.info(`\nHTML сохранен в: ${outputPath}`);
await browser.close();
}
main();