feat: add Postgres MCP integration for database testing

- Add postgres-mcp service to docker-compose.yml (SSE mode on port 8000)
- Add .mcp.json.example with SSE configuration template
- Add .gitignore entries for .claude/settings.local.json and .mcp.json
- Add MCP_EXAMPLES.md with query examples for testing scraping results
- Add analysis scripts: analyze-category-nulls.ts, check-product-details.ts,
  inspect-api-response.ts

Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
2026-01-21 23:29:02 +05:00
parent 6ba22469c7
commit 5a763a4e13
8 changed files with 580 additions and 7 deletions

View File

@@ -1,7 +0,0 @@
{
"permissions": {
"allow": [
"Bash(cat:*)"
]
}
}

3
.gitignore vendored
View File

@@ -35,3 +35,6 @@ test-results/
playwright-report/ playwright-report/
playwright/.cache/ playwright/.cache/
# Claude Code
.claude/settings.local.json
.mcp.json

8
.mcp.json.example Normal file
View File

@@ -0,0 +1,8 @@
{
"mcpServers": {
"postgres-supermarket": {
"type": "sse",
"url": "http://localhost:8000/sse"
}
}
}

354
MCP_EXAMPLES.md Normal file
View File

@@ -0,0 +1,354 @@
# MCP Examples for Supermarket Scraper
This document contains example queries and prompts you can use with the Postgres MCP server to test and analyze your scraping results.
## Setup
### 1. Install Docker (if not already installed)
- Windows: [Docker Desktop](https://www.docker.com/products/docker-desktop/)
- macOS: [Docker Desktop for Mac](https://www.docker.com/products/docker-desktop/)
- Linux: `sudo apt-get install docker.io`
### 2. Pull the Postgres MCP image
```bash
docker pull crystaldba/postgres-mcp
```
### 3. Start your database
```bash
docker-compose up -d postgres
```
### 4. Configure Claude Code with MCP
Copy the configuration from `.mcp.json.example` and add it to your Claude config:
| OS | Config File Location |
|----|---------------------|
| Windows | `%APPDATA%\Claude\claude_desktop_config.json` |
| macOS | `~/Library/Application Support/Claude/claude_desktop_config.json` |
| Linux | `~/.config/Claude/claude_desktop_config.json` |
Or via VSCode: `Settings``MCP``Configuration File`
---
## Natural Language Prompts
You can ask the AI questions in natural language, and it will use Postgres MCP to query your database:
### Database Overview
- "What tables exist in the database?"
- "Show me the schema of the Product table"
- "What are the relationships between tables?"
- "Analyze the database health"
### Scraping Results
- "How many products are in the database?"
- "Show me products with the highest discounts"
- "Find products without categories"
- "What is the price distribution of products?"
- "Which stores have the most products?"
### Performance
- "Are there any slow queries?"
- "What indexes should I add to improve performance?"
- "Show me the database health report"
---
## SQL Query Examples
You can also ask the AI to execute specific SQL queries using the MCP tools.
### 1. Basic Scraping Validation
```sql
-- Total products count
SELECT COUNT(*) as total_products FROM "Product";
-- Products by store
SELECT s.name, COUNT(p.id) as product_count
FROM "Store" s
LEFT JOIN "Product" p ON s.id = p."storeId"
GROUP BY s.id, s.name;
-- Latest scraping session
SELECT * FROM "ScrapingSession"
ORDER BY "startedAt" DESC LIMIT 1;
-- All scraping sessions with status
SELECT
id,
"sourceType",
status,
"startedAt",
"finishedAt",
CASE
WHEN "finishedAt" IS NOT NULL
THEN EXTRACT(EPOCH FROM ("finishedAt" - "startedAt"))
ELSE NULL
END as duration_seconds
FROM "ScrapingSession"
ORDER BY "startedAt" DESC;
```
### 2. Category Analysis
```sql
-- Products without categories
SELECT COUNT(*) FROM "Product" WHERE "categoryId" IS NULL;
-- Categories by product count
SELECT c.name, COUNT(p.id) as product_count
FROM "Category" c
LEFT JOIN "Product" p ON p."categoryId" = c.id
GROUP BY c.id, c.name
ORDER BY product_count DESC NULLS LAST
LIMIT 20;
-- Category hierarchy with counts
SELECT
c1.name as category,
c2.name as parent_category,
COUNT(p.id) as product_count
FROM "Category" c1
LEFT JOIN "Category" c2 ON c1."parentId" = c2.id
LEFT JOIN "Product" p ON p."categoryId" = c1.id
GROUP BY c1.id, c1.name, c2.name
ORDER BY product_count DESC;
-- Top-level categories (no parent)
SELECT c.name, COUNT(p.id) as product_count
FROM "Category" c
LEFT JOIN "Product" p ON p."categoryId" = c.id
WHERE c."parentId" IS NULL
GROUP BY c.id, c.name
ORDER BY product_count DESC;
```
### 3. Price and Promotion Analysis
```sql
-- Products with active discounts
SELECT
name,
"currentPrice",
"oldPrice",
"discountPercent",
"promotionEndDate"
FROM "Product"
WHERE "oldPrice" IS NOT NULL
AND ("promotionEndDate" IS NULL OR "promotionEndDate" > NOW())
ORDER BY "discountPercent" DESC
LIMIT 20;
-- Expired promotions
SELECT
name,
"currentPrice",
"oldPrice",
"discountPercent",
"promotionEndDate"
FROM "Product"
WHERE "oldPrice" IS NOT NULL
AND "promotionEndDate" IS NOT NULL
AND "promotionEndDate" < NOW()
ORDER BY "promotionEndDate" DESC
LIMIT 20;
-- Price distribution
SELECT
CASE
WHEN "currentPrice" < 100 THEN '0-100'
WHEN "currentPrice" < 500 THEN '100-500'
WHEN "currentPrice" < 1000 THEN '500-1000'
ELSE '1000+'
END as price_range,
COUNT(*) as count
FROM "Product"
GROUP BY price_range
ORDER BY price_range;
-- Most expensive products
SELECT name, "currentPrice", brand, unit
FROM "Product"
ORDER BY "currentPrice" DESC
LIMIT 20;
-- Cheapest products
SELECT name, "currentPrice", brand, unit
FROM "Product"
WHERE "currentPrice" > 0
ORDER BY "currentPrice" ASC
LIMIT 20;
```
### 4. Data Quality Checks
```sql
-- Products missing critical fields
SELECT
COUNT(*) FILTER (WHERE name IS NULL OR name = '') as missing_name,
COUNT(*) FILTER (WHERE "categoryId" IS NULL) as missing_category,
COUNT(*) FILTER (WHERE brand IS NULL OR brand = '') as missing_brand,
COUNT(*) FILTER (WHERE "imageUrl" IS NULL OR "imageUrl" = '') as missing_image,
COUNT(*) FILTER (WHERE url IS NULL OR url = '') as missing_url,
COUNT(*) as total_products
FROM "Product";
-- Duplicate products check (same externalId for different stores)
SELECT "externalId", COUNT(*) as count
FROM "Product"
GROUP BY "externalId"
HAVING COUNT(*) > 1;
-- Products with strange prices (0 or negative)
SELECT name, "currentPrice", "oldPrice"
FROM "Product"
WHERE "currentPrice" <= 0 OR ("oldPrice" IS NOT NULL AND "oldPrice" <= 0)
LIMIT 20;
-- Products with impossible discounts
SELECT name, "currentPrice", "oldPrice", "discountPercent"
FROM "Product"
WHERE "discountPercent" < 0 OR "discountPercent" > 100
LIMIT 20;
```
### 5. Rating Analysis
```sql
-- Top rated products
SELECT
name,
rating,
"scoresCount",
"commentsCount",
brand
FROM "Product"
WHERE rating IS NOT NULL
ORDER BY rating DESC, "scoresCount" DESC
LIMIT 20;
-- Most reviewed products
SELECT
name,
rating,
"scoresCount",
"commentsCount",
brand
FROM "Product"
WHERE "commentsCount" IS NOT NULL
ORDER BY "commentsCount" DESC
LIMIT 20;
-- Products without ratings
SELECT COUNT(*) FROM "Product" WHERE rating IS NULL;
```
### 6. Brand Analysis
```sql
-- Top brands by product count
SELECT brand, COUNT(*) as product_count
FROM "Product"
WHERE brand IS NOT NULL AND brand != ''
GROUP BY brand
ORDER BY product_count DESC
LIMIT 20;
-- Average price by brand (for brands with 10+ products)
SELECT
brand,
COUNT(*) as product_count,
AVG("currentPrice") as avg_price,
MIN("currentPrice") as min_price,
MAX("currentPrice") as max_price
FROM "Product"
WHERE brand IS NOT NULL AND brand != ''
GROUP BY brand
HAVING COUNT(*) >= 10
ORDER BY product_count DESC
LIMIT 20;
```
### 7. Health Check Queries
```sql
-- Table sizes
SELECT
schemaname,
tablename,
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size
FROM pg_tables
WHERE schemaname = 'public'
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
-- Index usage
SELECT
schemaname,
tablename,
indexname,
idx_scan,
idx_tup_read,
idx_tup_fetch
FROM pg_stat_user_indexes
ORDER BY idx_scan DESC;
-- Table row counts
SELECT
'Store' as table_name,
COUNT(*) as row_count
FROM "Store"
UNION ALL
SELECT 'Category', COUNT(*) FROM "Category"
UNION ALL
SELECT 'Product', COUNT(*) FROM "Product"
UNION ALL
SELECT 'ScrapingSession', COUNT(*) FROM "ScrapingSession";
```
---
## MCP Tools Reference
Postgres MCP provides these tools that the AI can use:
| Tool | Description |
|------|-------------|
| `list_schemas` | Lists all database schemas |
| `list_objects` | Lists tables, views, sequences in a schema |
| `get_object_details` | Gets table/column details |
| `execute_sql` | Executes SQL queries |
| `explain_query` | Shows query execution plan |
| `get_top_queries` | Reports slowest queries |
| `analyze_workload_indexes` | Recommends indexes for workload |
| `analyze_db_health` | Performs comprehensive health checks |
---
## Example Workflow
Here's a typical workflow for testing scraping results:
1. **Start the database**:
```bash
docker-compose up -d postgres
```
2. **Run the scraper**:
```bash
pnpm dev
```
3. **Ask the AI to verify**:
- "Check the database health"
- "How many products were scraped?"
- "Are there any products without categories?"
- "Show me the top 20 products by discount"
- "Find any data quality issues"
4. **Analyze performance**:
- "Are there any slow queries?"
- "Should I add any indexes?"

View File

@@ -47,6 +47,19 @@ services:
postgres: postgres:
condition: service_healthy condition: service_healthy
postgres-mcp:
image: crystaldba/postgres-mcp:latest
container_name: supermarket-postgres-mcp
restart: unless-stopped
environment:
DATABASE_URI: postgresql://user:password@postgres:5432/supermarket
ports:
- "8000:8000"
command: ["--access-mode=unrestricted", "--transport=sse"]
depends_on:
postgres:
condition: service_healthy
volumes: volumes:
postgres_data: postgres_data:
pgadmin_data: pgadmin_data:

View File

@@ -0,0 +1,97 @@
import 'dotenv/config';
import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js';
import { Logger } from '../utils/logger.js';
async function main() {
try {
await connectDatabase();
// Check total products and null categoryId count
const totalProducts = await prisma.product.count();
const nullCategoryCount = await prisma.product.count({
where: { categoryId: null }
});
const withCategoryCount = await prisma.product.count({
where: { categoryId: { not: null } }
});
Logger.info('\n📊 СТАТИСТИКА ПО КАТЕГОРИЯМ:');
Logger.info(`Всего товаров: ${totalProducts}`);
Logger.info(`Товаров без категории (null): ${nullCategoryCount} (${((nullCategoryCount / totalProducts) * 100).toFixed(2)}%)`);
Logger.info(`Товаров с категорией: ${withCategoryCount} (${((withCategoryCount / totalProducts) * 100).toFixed(2)}%)`);
// Check total categories
const totalCategories = await prisma.category.count();
Logger.info(`\nВсего категорий в БД: ${totalCategories}`);
// Sample categories
if (totalCategories > 0) {
const sampleCategories = await prisma.category.findMany({
take: 5,
select: {
id: true,
externalId: true,
name: true,
_count: {
select: { products: true }
}
}
});
Logger.info('\n📁 Примеры категорий:');
sampleCategories.forEach(cat => {
Logger.info(` - [${cat.externalId}] ${cat.name} (товаров: ${cat._count.products})`);
});
}
// Sample products without categories
const productsWithoutCategory = await prisma.product.findMany({
where: { categoryId: null },
take: 5,
select: {
id: true,
externalId: true,
name: true,
currentPrice: true
}
});
Logger.info('\n❌ Примеры товаров БЕЗ категории:');
productsWithoutCategory.forEach(p => {
Logger.info(` - [${p.externalId}] ${p.name} (₽${p.currentPrice})`);
});
// Sample products with categories
const productsWithCategory = await prisma.product.findMany({
where: { categoryId: { not: null } },
take: 5,
select: {
id: true,
externalId: true,
name: true,
currentPrice: true,
category: {
select: {
externalId: true,
name: true
}
}
}
});
if (productsWithCategory.length > 0) {
Logger.info('\n✅ Примеры товаров С категорией:');
productsWithCategory.forEach(p => {
Logger.info(` - [${p.externalId}] ${p.name} → [${p.category?.externalId}] ${p.category?.name}`);
});
}
} catch (error) {
Logger.error('❌ Ошибка при анализе:', error);
process.exit(1);
} finally {
await disconnectDatabase();
}
}
main();

View File

@@ -0,0 +1,38 @@
import 'dotenv/config';
import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js';
import { Logger } from '../utils/logger.js';
async function main() {
try {
await connectDatabase();
// Get a sample product with all fields
const product = await prisma.product.findFirst({
select: {
id: true,
externalId: true,
name: true,
description: true,
currentPrice: true,
unit: true,
weight: true,
brand: true,
categoryId: true,
badges: true,
}
});
if (product) {
Logger.info('=== ДЕТАЛИ ТОВАРА ИЗ БД ===');
Logger.info(JSON.stringify(product, null, 2));
}
} catch (error) {
Logger.error('❌ Ошибка:', error);
process.exit(1);
} finally {
await disconnectDatabase();
}
}
main();

View File

@@ -0,0 +1,67 @@
import 'dotenv/config';
import { MagnitApiScraper } from '../scrapers/api/magnit/MagnitApiScraper.js';
import { Logger } from '../utils/logger.js';
async function main() {
const storeCode = process.env.MAGNIT_STORE_CODE || '992301';
const scraper = new MagnitApiScraper({
storeCode,
storeType: process.env.MAGNIT_STORE_TYPE || '6',
catalogType: process.env.MAGNIT_CATALOG_TYPE || '1',
headless: process.env.MAGNIT_HEADLESS !== 'false',
});
try {
await scraper.initialize();
Logger.info('Запрос первых 5 товаров для инспекции...\n');
const response = await scraper.searchGoods({ limit: 5, offset: 0 }, []);
Logger.info(`Получено товаров: ${response.items.length}\n`);
if (response.items.length > 0) {
Logger.info('=== СТРУКТУРА ПЕРВОГО ТОВАРА ===');
const firstProduct = response.items[0];
Logger.info(JSON.stringify(firstProduct, null, 2));
Logger.info('\n=== ПРОВЕРКА НАЛИЧИЯ КАТЕГОРИЙ ===');
response.items.forEach((item, index) => {
Logger.info(
`${index + 1}. [${item.id}] ${item.name.substring(0, 50)}...`
);
if (item.category) {
Logger.info(` ✅ Категория: [${item.category.id}] ${item.category.title}`);
} else {
Logger.info(` ❌ Категория отсутствует (undefined)`);
}
});
Logger.info('\n=== ОТВЕТ API (response.category) ===');
if (response.category) {
Logger.info(`Категория уровня ответа: [${response.category.id}] ${response.category.title}`);
} else {
Logger.info('Категория уровня ответа отсутствует');
}
Logger.info('\n=== БЫСТРЫЕ КАТЕГОРИИ (fastCategoriesExtended) ===');
if (response.fastCategoriesExtended && response.fastCategoriesExtended.length > 0) {
Logger.info(`Найдено ${response.fastCategoriesExtended.length} быстрых категорий:`);
response.fastCategoriesExtended.slice(0, 10).forEach(cat => {
Logger.info(` - [${cat.id}] ${cat.title}`);
});
} else {
Logger.info('Быстрые категории отсутствуют');
}
}
} catch (error) {
Logger.error('❌ Ошибка:', error);
process.exit(1);
} finally {
await scraper.close();
}
}
main();