From 5a763a4e13830a4787badee13e4cb96b88ec571a Mon Sep 17 00:00:00 2001 From: Mc Smog Date: Wed, 21 Jan 2026 23:29:02 +0500 Subject: [PATCH] feat: add Postgres MCP integration for database testing - Add postgres-mcp service to docker-compose.yml (SSE mode on port 8000) - Add .mcp.json.example with SSE configuration template - Add .gitignore entries for .claude/settings.local.json and .mcp.json - Add MCP_EXAMPLES.md with query examples for testing scraping results - Add analysis scripts: analyze-category-nulls.ts, check-product-details.ts, inspect-api-response.ts Co-Authored-By: Claude --- .claude/settings.local.json | 7 - .gitignore | 3 + .mcp.json.example | 8 + MCP_EXAMPLES.md | 354 ++++++++++++++++++++++++++ docker-compose.yml | 13 + src/scripts/analyze-category-nulls.ts | 97 +++++++ src/scripts/check-product-details.ts | 38 +++ src/scripts/inspect-api-response.ts | 67 +++++ 8 files changed, 580 insertions(+), 7 deletions(-) delete mode 100644 .claude/settings.local.json create mode 100644 .mcp.json.example create mode 100644 MCP_EXAMPLES.md create mode 100644 src/scripts/analyze-category-nulls.ts create mode 100644 src/scripts/check-product-details.ts create mode 100644 src/scripts/inspect-api-response.ts diff --git a/.claude/settings.local.json b/.claude/settings.local.json deleted file mode 100644 index c5ce083..0000000 --- a/.claude/settings.local.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "permissions": { - "allow": [ - "Bash(cat:*)" - ] - } -} diff --git a/.gitignore b/.gitignore index b1aaad2..bcc05e8 100644 --- a/.gitignore +++ b/.gitignore @@ -35,3 +35,6 @@ test-results/ playwright-report/ playwright/.cache/ +# Claude Code +.claude/settings.local.json +.mcp.json diff --git a/.mcp.json.example b/.mcp.json.example new file mode 100644 index 0000000..4e5176d --- /dev/null +++ b/.mcp.json.example @@ -0,0 +1,8 @@ +{ + "mcpServers": { + "postgres-supermarket": { + "type": "sse", + "url": "http://localhost:8000/sse" + } + } +} diff --git a/MCP_EXAMPLES.md b/MCP_EXAMPLES.md new file mode 100644 index 0000000..368f75c --- /dev/null +++ b/MCP_EXAMPLES.md @@ -0,0 +1,354 @@ +# MCP Examples for Supermarket Scraper + +This document contains example queries and prompts you can use with the Postgres MCP server to test and analyze your scraping results. + +## Setup + +### 1. Install Docker (if not already installed) +- Windows: [Docker Desktop](https://www.docker.com/products/docker-desktop/) +- macOS: [Docker Desktop for Mac](https://www.docker.com/products/docker-desktop/) +- Linux: `sudo apt-get install docker.io` + +### 2. Pull the Postgres MCP image +```bash +docker pull crystaldba/postgres-mcp +``` + +### 3. Start your database +```bash +docker-compose up -d postgres +``` + +### 4. Configure Claude Code with MCP + +Copy the configuration from `.mcp.json.example` and add it to your Claude config: + +| OS | Config File Location | +|----|---------------------| +| Windows | `%APPDATA%\Claude\claude_desktop_config.json` | +| macOS | `~/Library/Application Support/Claude/claude_desktop_config.json` | +| Linux | `~/.config/Claude/claude_desktop_config.json` | + +Or via VSCode: `Settings` → `MCP` → `Configuration File` + +--- + +## Natural Language Prompts + +You can ask the AI questions in natural language, and it will use Postgres MCP to query your database: + +### Database Overview +- "What tables exist in the database?" +- "Show me the schema of the Product table" +- "What are the relationships between tables?" +- "Analyze the database health" + +### Scraping Results +- "How many products are in the database?" +- "Show me products with the highest discounts" +- "Find products without categories" +- "What is the price distribution of products?" +- "Which stores have the most products?" + +### Performance +- "Are there any slow queries?" +- "What indexes should I add to improve performance?" +- "Show me the database health report" + +--- + +## SQL Query Examples + +You can also ask the AI to execute specific SQL queries using the MCP tools. + +### 1. Basic Scraping Validation + +```sql +-- Total products count +SELECT COUNT(*) as total_products FROM "Product"; + +-- Products by store +SELECT s.name, COUNT(p.id) as product_count +FROM "Store" s +LEFT JOIN "Product" p ON s.id = p."storeId" +GROUP BY s.id, s.name; + +-- Latest scraping session +SELECT * FROM "ScrapingSession" +ORDER BY "startedAt" DESC LIMIT 1; + +-- All scraping sessions with status +SELECT + id, + "sourceType", + status, + "startedAt", + "finishedAt", + CASE + WHEN "finishedAt" IS NOT NULL + THEN EXTRACT(EPOCH FROM ("finishedAt" - "startedAt")) + ELSE NULL + END as duration_seconds +FROM "ScrapingSession" +ORDER BY "startedAt" DESC; +``` + +### 2. Category Analysis + +```sql +-- Products without categories +SELECT COUNT(*) FROM "Product" WHERE "categoryId" IS NULL; + +-- Categories by product count +SELECT c.name, COUNT(p.id) as product_count +FROM "Category" c +LEFT JOIN "Product" p ON p."categoryId" = c.id +GROUP BY c.id, c.name +ORDER BY product_count DESC NULLS LAST +LIMIT 20; + +-- Category hierarchy with counts +SELECT + c1.name as category, + c2.name as parent_category, + COUNT(p.id) as product_count +FROM "Category" c1 +LEFT JOIN "Category" c2 ON c1."parentId" = c2.id +LEFT JOIN "Product" p ON p."categoryId" = c1.id +GROUP BY c1.id, c1.name, c2.name +ORDER BY product_count DESC; + +-- Top-level categories (no parent) +SELECT c.name, COUNT(p.id) as product_count +FROM "Category" c +LEFT JOIN "Product" p ON p."categoryId" = c.id +WHERE c."parentId" IS NULL +GROUP BY c.id, c.name +ORDER BY product_count DESC; +``` + +### 3. Price and Promotion Analysis + +```sql +-- Products with active discounts +SELECT + name, + "currentPrice", + "oldPrice", + "discountPercent", + "promotionEndDate" +FROM "Product" +WHERE "oldPrice" IS NOT NULL + AND ("promotionEndDate" IS NULL OR "promotionEndDate" > NOW()) +ORDER BY "discountPercent" DESC +LIMIT 20; + +-- Expired promotions +SELECT + name, + "currentPrice", + "oldPrice", + "discountPercent", + "promotionEndDate" +FROM "Product" +WHERE "oldPrice" IS NOT NULL + AND "promotionEndDate" IS NOT NULL + AND "promotionEndDate" < NOW() +ORDER BY "promotionEndDate" DESC +LIMIT 20; + +-- Price distribution +SELECT + CASE + WHEN "currentPrice" < 100 THEN '0-100' + WHEN "currentPrice" < 500 THEN '100-500' + WHEN "currentPrice" < 1000 THEN '500-1000' + ELSE '1000+' + END as price_range, + COUNT(*) as count +FROM "Product" +GROUP BY price_range +ORDER BY price_range; + +-- Most expensive products +SELECT name, "currentPrice", brand, unit +FROM "Product" +ORDER BY "currentPrice" DESC +LIMIT 20; + +-- Cheapest products +SELECT name, "currentPrice", brand, unit +FROM "Product" +WHERE "currentPrice" > 0 +ORDER BY "currentPrice" ASC +LIMIT 20; +``` + +### 4. Data Quality Checks + +```sql +-- Products missing critical fields +SELECT + COUNT(*) FILTER (WHERE name IS NULL OR name = '') as missing_name, + COUNT(*) FILTER (WHERE "categoryId" IS NULL) as missing_category, + COUNT(*) FILTER (WHERE brand IS NULL OR brand = '') as missing_brand, + COUNT(*) FILTER (WHERE "imageUrl" IS NULL OR "imageUrl" = '') as missing_image, + COUNT(*) FILTER (WHERE url IS NULL OR url = '') as missing_url, + COUNT(*) as total_products +FROM "Product"; + +-- Duplicate products check (same externalId for different stores) +SELECT "externalId", COUNT(*) as count +FROM "Product" +GROUP BY "externalId" +HAVING COUNT(*) > 1; + +-- Products with strange prices (0 or negative) +SELECT name, "currentPrice", "oldPrice" +FROM "Product" +WHERE "currentPrice" <= 0 OR ("oldPrice" IS NOT NULL AND "oldPrice" <= 0) +LIMIT 20; + +-- Products with impossible discounts +SELECT name, "currentPrice", "oldPrice", "discountPercent" +FROM "Product" +WHERE "discountPercent" < 0 OR "discountPercent" > 100 +LIMIT 20; +``` + +### 5. Rating Analysis + +```sql +-- Top rated products +SELECT + name, + rating, + "scoresCount", + "commentsCount", + brand +FROM "Product" +WHERE rating IS NOT NULL +ORDER BY rating DESC, "scoresCount" DESC +LIMIT 20; + +-- Most reviewed products +SELECT + name, + rating, + "scoresCount", + "commentsCount", + brand +FROM "Product" +WHERE "commentsCount" IS NOT NULL +ORDER BY "commentsCount" DESC +LIMIT 20; + +-- Products without ratings +SELECT COUNT(*) FROM "Product" WHERE rating IS NULL; +``` + +### 6. Brand Analysis + +```sql +-- Top brands by product count +SELECT brand, COUNT(*) as product_count +FROM "Product" +WHERE brand IS NOT NULL AND brand != '' +GROUP BY brand +ORDER BY product_count DESC +LIMIT 20; + +-- Average price by brand (for brands with 10+ products) +SELECT + brand, + COUNT(*) as product_count, + AVG("currentPrice") as avg_price, + MIN("currentPrice") as min_price, + MAX("currentPrice") as max_price +FROM "Product" +WHERE brand IS NOT NULL AND brand != '' +GROUP BY brand +HAVING COUNT(*) >= 10 +ORDER BY product_count DESC +LIMIT 20; +``` + +### 7. Health Check Queries + +```sql +-- Table sizes +SELECT + schemaname, + tablename, + pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size +FROM pg_tables +WHERE schemaname = 'public' +ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC; + +-- Index usage +SELECT + schemaname, + tablename, + indexname, + idx_scan, + idx_tup_read, + idx_tup_fetch +FROM pg_stat_user_indexes +ORDER BY idx_scan DESC; + +-- Table row counts +SELECT + 'Store' as table_name, + COUNT(*) as row_count +FROM "Store" +UNION ALL +SELECT 'Category', COUNT(*) FROM "Category" +UNION ALL +SELECT 'Product', COUNT(*) FROM "Product" +UNION ALL +SELECT 'ScrapingSession', COUNT(*) FROM "ScrapingSession"; +``` + +--- + +## MCP Tools Reference + +Postgres MCP provides these tools that the AI can use: + +| Tool | Description | +|------|-------------| +| `list_schemas` | Lists all database schemas | +| `list_objects` | Lists tables, views, sequences in a schema | +| `get_object_details` | Gets table/column details | +| `execute_sql` | Executes SQL queries | +| `explain_query` | Shows query execution plan | +| `get_top_queries` | Reports slowest queries | +| `analyze_workload_indexes` | Recommends indexes for workload | +| `analyze_db_health` | Performs comprehensive health checks | + +--- + +## Example Workflow + +Here's a typical workflow for testing scraping results: + +1. **Start the database**: + ```bash + docker-compose up -d postgres + ``` + +2. **Run the scraper**: + ```bash + pnpm dev + ``` + +3. **Ask the AI to verify**: + - "Check the database health" + - "How many products were scraped?" + - "Are there any products without categories?" + - "Show me the top 20 products by discount" + - "Find any data quality issues" + +4. **Analyze performance**: + - "Are there any slow queries?" + - "Should I add any indexes?" diff --git a/docker-compose.yml b/docker-compose.yml index c58271f..3c9a208 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -47,6 +47,19 @@ services: postgres: condition: service_healthy + postgres-mcp: + image: crystaldba/postgres-mcp:latest + container_name: supermarket-postgres-mcp + restart: unless-stopped + environment: + DATABASE_URI: postgresql://user:password@postgres:5432/supermarket + ports: + - "8000:8000" + command: ["--access-mode=unrestricted", "--transport=sse"] + depends_on: + postgres: + condition: service_healthy + volumes: postgres_data: pgadmin_data: diff --git a/src/scripts/analyze-category-nulls.ts b/src/scripts/analyze-category-nulls.ts new file mode 100644 index 0000000..dc0ea7a --- /dev/null +++ b/src/scripts/analyze-category-nulls.ts @@ -0,0 +1,97 @@ +import 'dotenv/config'; +import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js'; +import { Logger } from '../utils/logger.js'; + +async function main() { + try { + await connectDatabase(); + + // Check total products and null categoryId count + const totalProducts = await prisma.product.count(); + const nullCategoryCount = await prisma.product.count({ + where: { categoryId: null } + }); + const withCategoryCount = await prisma.product.count({ + where: { categoryId: { not: null } } + }); + + Logger.info('\n📊 СТАТИСТИКА ПО КАТЕГОРИЯМ:'); + Logger.info(`Всего товаров: ${totalProducts}`); + Logger.info(`Товаров без категории (null): ${nullCategoryCount} (${((nullCategoryCount / totalProducts) * 100).toFixed(2)}%)`); + Logger.info(`Товаров с категорией: ${withCategoryCount} (${((withCategoryCount / totalProducts) * 100).toFixed(2)}%)`); + + // Check total categories + const totalCategories = await prisma.category.count(); + Logger.info(`\nВсего категорий в БД: ${totalCategories}`); + + // Sample categories + if (totalCategories > 0) { + const sampleCategories = await prisma.category.findMany({ + take: 5, + select: { + id: true, + externalId: true, + name: true, + _count: { + select: { products: true } + } + } + }); + + Logger.info('\n📁 Примеры категорий:'); + sampleCategories.forEach(cat => { + Logger.info(` - [${cat.externalId}] ${cat.name} (товаров: ${cat._count.products})`); + }); + } + + // Sample products without categories + const productsWithoutCategory = await prisma.product.findMany({ + where: { categoryId: null }, + take: 5, + select: { + id: true, + externalId: true, + name: true, + currentPrice: true + } + }); + + Logger.info('\n❌ Примеры товаров БЕЗ категории:'); + productsWithoutCategory.forEach(p => { + Logger.info(` - [${p.externalId}] ${p.name} (₽${p.currentPrice})`); + }); + + // Sample products with categories + const productsWithCategory = await prisma.product.findMany({ + where: { categoryId: { not: null } }, + take: 5, + select: { + id: true, + externalId: true, + name: true, + currentPrice: true, + category: { + select: { + externalId: true, + name: true + } + } + } + }); + + if (productsWithCategory.length > 0) { + Logger.info('\n✅ Примеры товаров С категорией:'); + productsWithCategory.forEach(p => { + Logger.info(` - [${p.externalId}] ${p.name} → [${p.category?.externalId}] ${p.category?.name}`); + }); + } + + } catch (error) { + Logger.error('❌ Ошибка при анализе:', error); + process.exit(1); + } finally { + await disconnectDatabase(); + } +} + +main(); diff --git a/src/scripts/check-product-details.ts b/src/scripts/check-product-details.ts new file mode 100644 index 0000000..7724e23 --- /dev/null +++ b/src/scripts/check-product-details.ts @@ -0,0 +1,38 @@ +import 'dotenv/config'; +import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js'; +import { Logger } from '../utils/logger.js'; + +async function main() { + try { + await connectDatabase(); + + // Get a sample product with all fields + const product = await prisma.product.findFirst({ + select: { + id: true, + externalId: true, + name: true, + description: true, + currentPrice: true, + unit: true, + weight: true, + brand: true, + categoryId: true, + badges: true, + } + }); + + if (product) { + Logger.info('=== ДЕТАЛИ ТОВАРА ИЗ БД ==='); + Logger.info(JSON.stringify(product, null, 2)); + } + + } catch (error) { + Logger.error('❌ Ошибка:', error); + process.exit(1); + } finally { + await disconnectDatabase(); + } +} + +main(); diff --git a/src/scripts/inspect-api-response.ts b/src/scripts/inspect-api-response.ts new file mode 100644 index 0000000..ab7e2f0 --- /dev/null +++ b/src/scripts/inspect-api-response.ts @@ -0,0 +1,67 @@ +import 'dotenv/config'; +import { MagnitApiScraper } from '../scrapers/api/magnit/MagnitApiScraper.js'; +import { Logger } from '../utils/logger.js'; + +async function main() { + const storeCode = process.env.MAGNIT_STORE_CODE || '992301'; + + const scraper = new MagnitApiScraper({ + storeCode, + storeType: process.env.MAGNIT_STORE_TYPE || '6', + catalogType: process.env.MAGNIT_CATALOG_TYPE || '1', + headless: process.env.MAGNIT_HEADLESS !== 'false', + }); + + try { + await scraper.initialize(); + + Logger.info('Запрос первых 5 товаров для инспекции...\n'); + + const response = await scraper.searchGoods({ limit: 5, offset: 0 }, []); + + Logger.info(`Получено товаров: ${response.items.length}\n`); + + if (response.items.length > 0) { + Logger.info('=== СТРУКТУРА ПЕРВОГО ТОВАРА ==='); + const firstProduct = response.items[0]; + Logger.info(JSON.stringify(firstProduct, null, 2)); + + Logger.info('\n=== ПРОВЕРКА НАЛИЧИЯ КАТЕГОРИЙ ==='); + response.items.forEach((item, index) => { + Logger.info( + `${index + 1}. [${item.id}] ${item.name.substring(0, 50)}...` + ); + if (item.category) { + Logger.info(` ✅ Категория: [${item.category.id}] ${item.category.title}`); + } else { + Logger.info(` ❌ Категория отсутствует (undefined)`); + } + }); + + Logger.info('\n=== ОТВЕТ API (response.category) ==='); + if (response.category) { + Logger.info(`Категория уровня ответа: [${response.category.id}] ${response.category.title}`); + } else { + Logger.info('Категория уровня ответа отсутствует'); + } + + Logger.info('\n=== БЫСТРЫЕ КАТЕГОРИИ (fastCategoriesExtended) ==='); + if (response.fastCategoriesExtended && response.fastCategoriesExtended.length > 0) { + Logger.info(`Найдено ${response.fastCategoriesExtended.length} быстрых категорий:`); + response.fastCategoriesExtended.slice(0, 10).forEach(cat => { + Logger.info(` - [${cat.id}] ${cat.title}`); + }); + } else { + Logger.info('Быстрые категории отсутствуют'); + } + } + + } catch (error) { + Logger.error('❌ Ошибка:', error); + process.exit(1); + } finally { + await scraper.close(); + } +} + +main();