feat: add Postgres MCP integration for database testing
- Add postgres-mcp service to docker-compose.yml (SSE mode on port 8000) - Add .mcp.json.example with SSE configuration template - Add .gitignore entries for .claude/settings.local.json and .mcp.json - Add MCP_EXAMPLES.md with query examples for testing scraping results - Add analysis scripts: analyze-category-nulls.ts, check-product-details.ts, inspect-api-response.ts Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
@@ -1,7 +0,0 @@
|
||||
{
|
||||
"permissions": {
|
||||
"allow": [
|
||||
"Bash(cat:*)"
|
||||
]
|
||||
}
|
||||
}
|
||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -35,3 +35,6 @@ test-results/
|
||||
playwright-report/
|
||||
playwright/.cache/
|
||||
|
||||
# Claude Code
|
||||
.claude/settings.local.json
|
||||
.mcp.json
|
||||
|
||||
8
.mcp.json.example
Normal file
8
.mcp.json.example
Normal file
@@ -0,0 +1,8 @@
|
||||
{
|
||||
"mcpServers": {
|
||||
"postgres-supermarket": {
|
||||
"type": "sse",
|
||||
"url": "http://localhost:8000/sse"
|
||||
}
|
||||
}
|
||||
}
|
||||
354
MCP_EXAMPLES.md
Normal file
354
MCP_EXAMPLES.md
Normal file
@@ -0,0 +1,354 @@
|
||||
# MCP Examples for Supermarket Scraper
|
||||
|
||||
This document contains example queries and prompts you can use with the Postgres MCP server to test and analyze your scraping results.
|
||||
|
||||
## Setup
|
||||
|
||||
### 1. Install Docker (if not already installed)
|
||||
- Windows: [Docker Desktop](https://www.docker.com/products/docker-desktop/)
|
||||
- macOS: [Docker Desktop for Mac](https://www.docker.com/products/docker-desktop/)
|
||||
- Linux: `sudo apt-get install docker.io`
|
||||
|
||||
### 2. Pull the Postgres MCP image
|
||||
```bash
|
||||
docker pull crystaldba/postgres-mcp
|
||||
```
|
||||
|
||||
### 3. Start your database
|
||||
```bash
|
||||
docker-compose up -d postgres
|
||||
```
|
||||
|
||||
### 4. Configure Claude Code with MCP
|
||||
|
||||
Copy the configuration from `.mcp.json.example` and add it to your Claude config:
|
||||
|
||||
| OS | Config File Location |
|
||||
|----|---------------------|
|
||||
| Windows | `%APPDATA%\Claude\claude_desktop_config.json` |
|
||||
| macOS | `~/Library/Application Support/Claude/claude_desktop_config.json` |
|
||||
| Linux | `~/.config/Claude/claude_desktop_config.json` |
|
||||
|
||||
Or via VSCode: `Settings` → `MCP` → `Configuration File`
|
||||
|
||||
---
|
||||
|
||||
## Natural Language Prompts
|
||||
|
||||
You can ask the AI questions in natural language, and it will use Postgres MCP to query your database:
|
||||
|
||||
### Database Overview
|
||||
- "What tables exist in the database?"
|
||||
- "Show me the schema of the Product table"
|
||||
- "What are the relationships between tables?"
|
||||
- "Analyze the database health"
|
||||
|
||||
### Scraping Results
|
||||
- "How many products are in the database?"
|
||||
- "Show me products with the highest discounts"
|
||||
- "Find products without categories"
|
||||
- "What is the price distribution of products?"
|
||||
- "Which stores have the most products?"
|
||||
|
||||
### Performance
|
||||
- "Are there any slow queries?"
|
||||
- "What indexes should I add to improve performance?"
|
||||
- "Show me the database health report"
|
||||
|
||||
---
|
||||
|
||||
## SQL Query Examples
|
||||
|
||||
You can also ask the AI to execute specific SQL queries using the MCP tools.
|
||||
|
||||
### 1. Basic Scraping Validation
|
||||
|
||||
```sql
|
||||
-- Total products count
|
||||
SELECT COUNT(*) as total_products FROM "Product";
|
||||
|
||||
-- Products by store
|
||||
SELECT s.name, COUNT(p.id) as product_count
|
||||
FROM "Store" s
|
||||
LEFT JOIN "Product" p ON s.id = p."storeId"
|
||||
GROUP BY s.id, s.name;
|
||||
|
||||
-- Latest scraping session
|
||||
SELECT * FROM "ScrapingSession"
|
||||
ORDER BY "startedAt" DESC LIMIT 1;
|
||||
|
||||
-- All scraping sessions with status
|
||||
SELECT
|
||||
id,
|
||||
"sourceType",
|
||||
status,
|
||||
"startedAt",
|
||||
"finishedAt",
|
||||
CASE
|
||||
WHEN "finishedAt" IS NOT NULL
|
||||
THEN EXTRACT(EPOCH FROM ("finishedAt" - "startedAt"))
|
||||
ELSE NULL
|
||||
END as duration_seconds
|
||||
FROM "ScrapingSession"
|
||||
ORDER BY "startedAt" DESC;
|
||||
```
|
||||
|
||||
### 2. Category Analysis
|
||||
|
||||
```sql
|
||||
-- Products without categories
|
||||
SELECT COUNT(*) FROM "Product" WHERE "categoryId" IS NULL;
|
||||
|
||||
-- Categories by product count
|
||||
SELECT c.name, COUNT(p.id) as product_count
|
||||
FROM "Category" c
|
||||
LEFT JOIN "Product" p ON p."categoryId" = c.id
|
||||
GROUP BY c.id, c.name
|
||||
ORDER BY product_count DESC NULLS LAST
|
||||
LIMIT 20;
|
||||
|
||||
-- Category hierarchy with counts
|
||||
SELECT
|
||||
c1.name as category,
|
||||
c2.name as parent_category,
|
||||
COUNT(p.id) as product_count
|
||||
FROM "Category" c1
|
||||
LEFT JOIN "Category" c2 ON c1."parentId" = c2.id
|
||||
LEFT JOIN "Product" p ON p."categoryId" = c1.id
|
||||
GROUP BY c1.id, c1.name, c2.name
|
||||
ORDER BY product_count DESC;
|
||||
|
||||
-- Top-level categories (no parent)
|
||||
SELECT c.name, COUNT(p.id) as product_count
|
||||
FROM "Category" c
|
||||
LEFT JOIN "Product" p ON p."categoryId" = c.id
|
||||
WHERE c."parentId" IS NULL
|
||||
GROUP BY c.id, c.name
|
||||
ORDER BY product_count DESC;
|
||||
```
|
||||
|
||||
### 3. Price and Promotion Analysis
|
||||
|
||||
```sql
|
||||
-- Products with active discounts
|
||||
SELECT
|
||||
name,
|
||||
"currentPrice",
|
||||
"oldPrice",
|
||||
"discountPercent",
|
||||
"promotionEndDate"
|
||||
FROM "Product"
|
||||
WHERE "oldPrice" IS NOT NULL
|
||||
AND ("promotionEndDate" IS NULL OR "promotionEndDate" > NOW())
|
||||
ORDER BY "discountPercent" DESC
|
||||
LIMIT 20;
|
||||
|
||||
-- Expired promotions
|
||||
SELECT
|
||||
name,
|
||||
"currentPrice",
|
||||
"oldPrice",
|
||||
"discountPercent",
|
||||
"promotionEndDate"
|
||||
FROM "Product"
|
||||
WHERE "oldPrice" IS NOT NULL
|
||||
AND "promotionEndDate" IS NOT NULL
|
||||
AND "promotionEndDate" < NOW()
|
||||
ORDER BY "promotionEndDate" DESC
|
||||
LIMIT 20;
|
||||
|
||||
-- Price distribution
|
||||
SELECT
|
||||
CASE
|
||||
WHEN "currentPrice" < 100 THEN '0-100'
|
||||
WHEN "currentPrice" < 500 THEN '100-500'
|
||||
WHEN "currentPrice" < 1000 THEN '500-1000'
|
||||
ELSE '1000+'
|
||||
END as price_range,
|
||||
COUNT(*) as count
|
||||
FROM "Product"
|
||||
GROUP BY price_range
|
||||
ORDER BY price_range;
|
||||
|
||||
-- Most expensive products
|
||||
SELECT name, "currentPrice", brand, unit
|
||||
FROM "Product"
|
||||
ORDER BY "currentPrice" DESC
|
||||
LIMIT 20;
|
||||
|
||||
-- Cheapest products
|
||||
SELECT name, "currentPrice", brand, unit
|
||||
FROM "Product"
|
||||
WHERE "currentPrice" > 0
|
||||
ORDER BY "currentPrice" ASC
|
||||
LIMIT 20;
|
||||
```
|
||||
|
||||
### 4. Data Quality Checks
|
||||
|
||||
```sql
|
||||
-- Products missing critical fields
|
||||
SELECT
|
||||
COUNT(*) FILTER (WHERE name IS NULL OR name = '') as missing_name,
|
||||
COUNT(*) FILTER (WHERE "categoryId" IS NULL) as missing_category,
|
||||
COUNT(*) FILTER (WHERE brand IS NULL OR brand = '') as missing_brand,
|
||||
COUNT(*) FILTER (WHERE "imageUrl" IS NULL OR "imageUrl" = '') as missing_image,
|
||||
COUNT(*) FILTER (WHERE url IS NULL OR url = '') as missing_url,
|
||||
COUNT(*) as total_products
|
||||
FROM "Product";
|
||||
|
||||
-- Duplicate products check (same externalId for different stores)
|
||||
SELECT "externalId", COUNT(*) as count
|
||||
FROM "Product"
|
||||
GROUP BY "externalId"
|
||||
HAVING COUNT(*) > 1;
|
||||
|
||||
-- Products with strange prices (0 or negative)
|
||||
SELECT name, "currentPrice", "oldPrice"
|
||||
FROM "Product"
|
||||
WHERE "currentPrice" <= 0 OR ("oldPrice" IS NOT NULL AND "oldPrice" <= 0)
|
||||
LIMIT 20;
|
||||
|
||||
-- Products with impossible discounts
|
||||
SELECT name, "currentPrice", "oldPrice", "discountPercent"
|
||||
FROM "Product"
|
||||
WHERE "discountPercent" < 0 OR "discountPercent" > 100
|
||||
LIMIT 20;
|
||||
```
|
||||
|
||||
### 5. Rating Analysis
|
||||
|
||||
```sql
|
||||
-- Top rated products
|
||||
SELECT
|
||||
name,
|
||||
rating,
|
||||
"scoresCount",
|
||||
"commentsCount",
|
||||
brand
|
||||
FROM "Product"
|
||||
WHERE rating IS NOT NULL
|
||||
ORDER BY rating DESC, "scoresCount" DESC
|
||||
LIMIT 20;
|
||||
|
||||
-- Most reviewed products
|
||||
SELECT
|
||||
name,
|
||||
rating,
|
||||
"scoresCount",
|
||||
"commentsCount",
|
||||
brand
|
||||
FROM "Product"
|
||||
WHERE "commentsCount" IS NOT NULL
|
||||
ORDER BY "commentsCount" DESC
|
||||
LIMIT 20;
|
||||
|
||||
-- Products without ratings
|
||||
SELECT COUNT(*) FROM "Product" WHERE rating IS NULL;
|
||||
```
|
||||
|
||||
### 6. Brand Analysis
|
||||
|
||||
```sql
|
||||
-- Top brands by product count
|
||||
SELECT brand, COUNT(*) as product_count
|
||||
FROM "Product"
|
||||
WHERE brand IS NOT NULL AND brand != ''
|
||||
GROUP BY brand
|
||||
ORDER BY product_count DESC
|
||||
LIMIT 20;
|
||||
|
||||
-- Average price by brand (for brands with 10+ products)
|
||||
SELECT
|
||||
brand,
|
||||
COUNT(*) as product_count,
|
||||
AVG("currentPrice") as avg_price,
|
||||
MIN("currentPrice") as min_price,
|
||||
MAX("currentPrice") as max_price
|
||||
FROM "Product"
|
||||
WHERE brand IS NOT NULL AND brand != ''
|
||||
GROUP BY brand
|
||||
HAVING COUNT(*) >= 10
|
||||
ORDER BY product_count DESC
|
||||
LIMIT 20;
|
||||
```
|
||||
|
||||
### 7. Health Check Queries
|
||||
|
||||
```sql
|
||||
-- Table sizes
|
||||
SELECT
|
||||
schemaname,
|
||||
tablename,
|
||||
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size
|
||||
FROM pg_tables
|
||||
WHERE schemaname = 'public'
|
||||
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
|
||||
|
||||
-- Index usage
|
||||
SELECT
|
||||
schemaname,
|
||||
tablename,
|
||||
indexname,
|
||||
idx_scan,
|
||||
idx_tup_read,
|
||||
idx_tup_fetch
|
||||
FROM pg_stat_user_indexes
|
||||
ORDER BY idx_scan DESC;
|
||||
|
||||
-- Table row counts
|
||||
SELECT
|
||||
'Store' as table_name,
|
||||
COUNT(*) as row_count
|
||||
FROM "Store"
|
||||
UNION ALL
|
||||
SELECT 'Category', COUNT(*) FROM "Category"
|
||||
UNION ALL
|
||||
SELECT 'Product', COUNT(*) FROM "Product"
|
||||
UNION ALL
|
||||
SELECT 'ScrapingSession', COUNT(*) FROM "ScrapingSession";
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## MCP Tools Reference
|
||||
|
||||
Postgres MCP provides these tools that the AI can use:
|
||||
|
||||
| Tool | Description |
|
||||
|------|-------------|
|
||||
| `list_schemas` | Lists all database schemas |
|
||||
| `list_objects` | Lists tables, views, sequences in a schema |
|
||||
| `get_object_details` | Gets table/column details |
|
||||
| `execute_sql` | Executes SQL queries |
|
||||
| `explain_query` | Shows query execution plan |
|
||||
| `get_top_queries` | Reports slowest queries |
|
||||
| `analyze_workload_indexes` | Recommends indexes for workload |
|
||||
| `analyze_db_health` | Performs comprehensive health checks |
|
||||
|
||||
---
|
||||
|
||||
## Example Workflow
|
||||
|
||||
Here's a typical workflow for testing scraping results:
|
||||
|
||||
1. **Start the database**:
|
||||
```bash
|
||||
docker-compose up -d postgres
|
||||
```
|
||||
|
||||
2. **Run the scraper**:
|
||||
```bash
|
||||
pnpm dev
|
||||
```
|
||||
|
||||
3. **Ask the AI to verify**:
|
||||
- "Check the database health"
|
||||
- "How many products were scraped?"
|
||||
- "Are there any products without categories?"
|
||||
- "Show me the top 20 products by discount"
|
||||
- "Find any data quality issues"
|
||||
|
||||
4. **Analyze performance**:
|
||||
- "Are there any slow queries?"
|
||||
- "Should I add any indexes?"
|
||||
@@ -47,6 +47,19 @@ services:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
|
||||
postgres-mcp:
|
||||
image: crystaldba/postgres-mcp:latest
|
||||
container_name: supermarket-postgres-mcp
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
DATABASE_URI: postgresql://user:password@postgres:5432/supermarket
|
||||
ports:
|
||||
- "8000:8000"
|
||||
command: ["--access-mode=unrestricted", "--transport=sse"]
|
||||
depends_on:
|
||||
postgres:
|
||||
condition: service_healthy
|
||||
|
||||
volumes:
|
||||
postgres_data:
|
||||
pgadmin_data:
|
||||
|
||||
97
src/scripts/analyze-category-nulls.ts
Normal file
97
src/scripts/analyze-category-nulls.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
import 'dotenv/config';
|
||||
import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js';
|
||||
import { Logger } from '../utils/logger.js';
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
await connectDatabase();
|
||||
|
||||
// Check total products and null categoryId count
|
||||
const totalProducts = await prisma.product.count();
|
||||
const nullCategoryCount = await prisma.product.count({
|
||||
where: { categoryId: null }
|
||||
});
|
||||
const withCategoryCount = await prisma.product.count({
|
||||
where: { categoryId: { not: null } }
|
||||
});
|
||||
|
||||
Logger.info('\n📊 СТАТИСТИКА ПО КАТЕГОРИЯМ:');
|
||||
Logger.info(`Всего товаров: ${totalProducts}`);
|
||||
Logger.info(`Товаров без категории (null): ${nullCategoryCount} (${((nullCategoryCount / totalProducts) * 100).toFixed(2)}%)`);
|
||||
Logger.info(`Товаров с категорией: ${withCategoryCount} (${((withCategoryCount / totalProducts) * 100).toFixed(2)}%)`);
|
||||
|
||||
// Check total categories
|
||||
const totalCategories = await prisma.category.count();
|
||||
Logger.info(`\nВсего категорий в БД: ${totalCategories}`);
|
||||
|
||||
// Sample categories
|
||||
if (totalCategories > 0) {
|
||||
const sampleCategories = await prisma.category.findMany({
|
||||
take: 5,
|
||||
select: {
|
||||
id: true,
|
||||
externalId: true,
|
||||
name: true,
|
||||
_count: {
|
||||
select: { products: true }
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
Logger.info('\n📁 Примеры категорий:');
|
||||
sampleCategories.forEach(cat => {
|
||||
Logger.info(` - [${cat.externalId}] ${cat.name} (товаров: ${cat._count.products})`);
|
||||
});
|
||||
}
|
||||
|
||||
// Sample products without categories
|
||||
const productsWithoutCategory = await prisma.product.findMany({
|
||||
where: { categoryId: null },
|
||||
take: 5,
|
||||
select: {
|
||||
id: true,
|
||||
externalId: true,
|
||||
name: true,
|
||||
currentPrice: true
|
||||
}
|
||||
});
|
||||
|
||||
Logger.info('\n❌ Примеры товаров БЕЗ категории:');
|
||||
productsWithoutCategory.forEach(p => {
|
||||
Logger.info(` - [${p.externalId}] ${p.name} (₽${p.currentPrice})`);
|
||||
});
|
||||
|
||||
// Sample products with categories
|
||||
const productsWithCategory = await prisma.product.findMany({
|
||||
where: { categoryId: { not: null } },
|
||||
take: 5,
|
||||
select: {
|
||||
id: true,
|
||||
externalId: true,
|
||||
name: true,
|
||||
currentPrice: true,
|
||||
category: {
|
||||
select: {
|
||||
externalId: true,
|
||||
name: true
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (productsWithCategory.length > 0) {
|
||||
Logger.info('\n✅ Примеры товаров С категорией:');
|
||||
productsWithCategory.forEach(p => {
|
||||
Logger.info(` - [${p.externalId}] ${p.name} → [${p.category?.externalId}] ${p.category?.name}`);
|
||||
});
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
Logger.error('❌ Ошибка при анализе:', error);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await disconnectDatabase();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
38
src/scripts/check-product-details.ts
Normal file
38
src/scripts/check-product-details.ts
Normal file
@@ -0,0 +1,38 @@
|
||||
import 'dotenv/config';
|
||||
import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js';
|
||||
import { Logger } from '../utils/logger.js';
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
await connectDatabase();
|
||||
|
||||
// Get a sample product with all fields
|
||||
const product = await prisma.product.findFirst({
|
||||
select: {
|
||||
id: true,
|
||||
externalId: true,
|
||||
name: true,
|
||||
description: true,
|
||||
currentPrice: true,
|
||||
unit: true,
|
||||
weight: true,
|
||||
brand: true,
|
||||
categoryId: true,
|
||||
badges: true,
|
||||
}
|
||||
});
|
||||
|
||||
if (product) {
|
||||
Logger.info('=== ДЕТАЛИ ТОВАРА ИЗ БД ===');
|
||||
Logger.info(JSON.stringify(product, null, 2));
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
Logger.error('❌ Ошибка:', error);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await disconnectDatabase();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
67
src/scripts/inspect-api-response.ts
Normal file
67
src/scripts/inspect-api-response.ts
Normal file
@@ -0,0 +1,67 @@
|
||||
import 'dotenv/config';
|
||||
import { MagnitApiScraper } from '../scrapers/api/magnit/MagnitApiScraper.js';
|
||||
import { Logger } from '../utils/logger.js';
|
||||
|
||||
async function main() {
|
||||
const storeCode = process.env.MAGNIT_STORE_CODE || '992301';
|
||||
|
||||
const scraper = new MagnitApiScraper({
|
||||
storeCode,
|
||||
storeType: process.env.MAGNIT_STORE_TYPE || '6',
|
||||
catalogType: process.env.MAGNIT_CATALOG_TYPE || '1',
|
||||
headless: process.env.MAGNIT_HEADLESS !== 'false',
|
||||
});
|
||||
|
||||
try {
|
||||
await scraper.initialize();
|
||||
|
||||
Logger.info('Запрос первых 5 товаров для инспекции...\n');
|
||||
|
||||
const response = await scraper.searchGoods({ limit: 5, offset: 0 }, []);
|
||||
|
||||
Logger.info(`Получено товаров: ${response.items.length}\n`);
|
||||
|
||||
if (response.items.length > 0) {
|
||||
Logger.info('=== СТРУКТУРА ПЕРВОГО ТОВАРА ===');
|
||||
const firstProduct = response.items[0];
|
||||
Logger.info(JSON.stringify(firstProduct, null, 2));
|
||||
|
||||
Logger.info('\n=== ПРОВЕРКА НАЛИЧИЯ КАТЕГОРИЙ ===');
|
||||
response.items.forEach((item, index) => {
|
||||
Logger.info(
|
||||
`${index + 1}. [${item.id}] ${item.name.substring(0, 50)}...`
|
||||
);
|
||||
if (item.category) {
|
||||
Logger.info(` ✅ Категория: [${item.category.id}] ${item.category.title}`);
|
||||
} else {
|
||||
Logger.info(` ❌ Категория отсутствует (undefined)`);
|
||||
}
|
||||
});
|
||||
|
||||
Logger.info('\n=== ОТВЕТ API (response.category) ===');
|
||||
if (response.category) {
|
||||
Logger.info(`Категория уровня ответа: [${response.category.id}] ${response.category.title}`);
|
||||
} else {
|
||||
Logger.info('Категория уровня ответа отсутствует');
|
||||
}
|
||||
|
||||
Logger.info('\n=== БЫСТРЫЕ КАТЕГОРИИ (fastCategoriesExtended) ===');
|
||||
if (response.fastCategoriesExtended && response.fastCategoriesExtended.length > 0) {
|
||||
Logger.info(`Найдено ${response.fastCategoriesExtended.length} быстрых категорий:`);
|
||||
response.fastCategoriesExtended.slice(0, 10).forEach(cat => {
|
||||
Logger.info(` - [${cat.id}] ${cat.title}`);
|
||||
});
|
||||
} else {
|
||||
Logger.info('Быстрые категории отсутствуют');
|
||||
}
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
Logger.error('❌ Ошибка:', error);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await scraper.close();
|
||||
}
|
||||
}
|
||||
|
||||
main();
|
||||
Reference in New Issue
Block a user