Compare commits
6 Commits
9164527f58
...
912946bb00
| Author | SHA1 | Date | |
|---|---|---|---|
| 912946bb00 | |||
| b8f170d83b | |||
| dd4c64c601 | |||
| 3299cca574 | |||
| 5a763a4e13 | |||
| 6ba22469c7 |
@@ -1,7 +0,0 @@
|
|||||||
{
|
|
||||||
"permissions": {
|
|
||||||
"allow": [
|
|
||||||
"Bash(cat:*)"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
}
|
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -35,3 +35,6 @@ test-results/
|
|||||||
playwright-report/
|
playwright-report/
|
||||||
playwright/.cache/
|
playwright/.cache/
|
||||||
|
|
||||||
|
# Claude Code
|
||||||
|
.claude/settings.local.json
|
||||||
|
.mcp.json
|
||||||
|
|||||||
8
.mcp.json.example
Normal file
8
.mcp.json.example
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
{
|
||||||
|
"mcpServers": {
|
||||||
|
"postgres-supermarket": {
|
||||||
|
"type": "sse",
|
||||||
|
"url": "http://localhost:8000/sse"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
151
AGENTS.md
Normal file
151
AGENTS.md
Normal file
@@ -0,0 +1,151 @@
|
|||||||
|
# AGENTS.md
|
||||||
|
|
||||||
|
Guidelines for AI coding agents working on this repository.
|
||||||
|
|
||||||
|
## Project Overview
|
||||||
|
|
||||||
|
TypeScript-based scraper for Russian supermarkets (Magnit). Uses Playwright for sessions, Axios for API, PostgreSQL with Prisma ORM.
|
||||||
|
|
||||||
|
## Build & Run Commands
|
||||||
|
|
||||||
|
**Package Manager**: Use `pnpm` (not npm/yarn)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm install # Install dependencies
|
||||||
|
pnpm exec playwright install chromium # Install browsers (once)
|
||||||
|
pnpm type-check # Type checking (validation)
|
||||||
|
pnpm build # Build TypeScript to dist/
|
||||||
|
pnpm dev # Run main scraper
|
||||||
|
pnpm enrich # Run product enrichment
|
||||||
|
pnpm test-db # Test database connection
|
||||||
|
```
|
||||||
|
|
||||||
|
### Prisma Commands
|
||||||
|
|
||||||
|
```bash
|
||||||
|
pnpm prisma:generate # Generate client after schema changes
|
||||||
|
pnpm prisma:migrate # Create and apply migrations
|
||||||
|
pnpm prisma:studio # Open database GUI
|
||||||
|
```
|
||||||
|
|
||||||
|
### Running Scripts Directly
|
||||||
|
|
||||||
|
```bash
|
||||||
|
tsx src/scripts/scrape-magnit-products.ts
|
||||||
|
MAGNIT_STORE_CODE=992301 tsx src/scripts/scrape-magnit-products.ts
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing
|
||||||
|
|
||||||
|
No test framework configured. Manual testing via `pnpm test-db`, `pnpm dev`, Prisma Studio.
|
||||||
|
|
||||||
|
## Code Style
|
||||||
|
|
||||||
|
### Imports
|
||||||
|
|
||||||
|
1. External packages first, then internal modules
|
||||||
|
2. **Always include `.js` extension** for local imports (ESM)
|
||||||
|
3. Use named imports from Prisma client
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
import { chromium, Browser } from 'playwright';
|
||||||
|
import axios from 'axios';
|
||||||
|
import { Logger } from '../../../utils/logger.js';
|
||||||
|
import { PrismaClient } from '../../../../generated/prisma/client.js';
|
||||||
|
```
|
||||||
|
|
||||||
|
### Naming Conventions
|
||||||
|
|
||||||
|
| Type | Convention | Example |
|
||||||
|
|------|------------|---------|
|
||||||
|
| Classes/Interfaces | PascalCase | `MagnitApiScraper`, `CreateProductData` |
|
||||||
|
| Functions/variables | camelCase | `scrapeAllProducts`, `deviceId` |
|
||||||
|
| Constants | UPPER_SNAKE_CASE | `ACTUAL_API_PAGE_SIZE` |
|
||||||
|
| Class files | PascalCase | `MagnitApiScraper.ts` |
|
||||||
|
| Util files | camelCase | `logger.ts`, `errors.ts` |
|
||||||
|
|
||||||
|
### TypeScript Patterns
|
||||||
|
|
||||||
|
- **Strict mode** - all types explicit
|
||||||
|
- Interfaces for data, optional props with `?`, `readonly` for constants
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
export interface MagnitScraperConfig {
|
||||||
|
storeCode: string;
|
||||||
|
headless?: boolean;
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Error Handling
|
||||||
|
|
||||||
|
Use custom error classes from `src/utils/errors.ts`:
|
||||||
|
- `ScraperError` - scraping failures
|
||||||
|
- `DatabaseError` - database operations
|
||||||
|
- `APIError` - HTTP/API failures (includes statusCode)
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
try {
|
||||||
|
// operation
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error('Ошибка операции:', error);
|
||||||
|
throw new APIError(
|
||||||
|
`Не удалось: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
statusCode
|
||||||
|
);
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Logging
|
||||||
|
|
||||||
|
Use static `Logger` class from `src/utils/logger.ts`:
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
Logger.info('Message'); // Always shown
|
||||||
|
Logger.error('Error:', error); // Always shown
|
||||||
|
Logger.debug('Debug'); // Only when DEBUG=true
|
||||||
|
```
|
||||||
|
|
||||||
|
### Async/Class Patterns
|
||||||
|
|
||||||
|
- All async methods return `Promise<T>` with explicit return types
|
||||||
|
- Class order: private props -> constructor -> public methods -> private methods
|
||||||
|
- Lifecycle: `initialize()` -> operations -> `close()`
|
||||||
|
|
||||||
|
### Services Pattern
|
||||||
|
|
||||||
|
- Services receive `PrismaClient` via constructor (DI)
|
||||||
|
- Use `getOrCreate` for idempotent operations
|
||||||
|
- Never call Prisma directly from scrapers
|
||||||
|
|
||||||
|
### Database Patterns
|
||||||
|
|
||||||
|
- Upsert via composite unique `(externalId, storeId)`
|
||||||
|
- Batch processing: 50 items per batch
|
||||||
|
- Prices: Float (rubles), converted from kopecks
|
||||||
|
|
||||||
|
### Comments
|
||||||
|
|
||||||
|
- JSDoc for public methods, inline comments in Russian
|
||||||
|
|
||||||
|
```typescript
|
||||||
|
/** Инициализация сессии через Playwright */
|
||||||
|
async initialize(): Promise<void> { }
|
||||||
|
```
|
||||||
|
|
||||||
|
## Cursor Rules
|
||||||
|
|
||||||
|
### Requestly API Tests (`.requestly-supermarket/**/*.json`)
|
||||||
|
|
||||||
|
- Use `rq.test()` for tests, `rq.expect()` for assertions
|
||||||
|
- Access response via `rq.response.body` (parse as JSON)
|
||||||
|
- Prices in kopecks (24999 = 249.99 rubles)
|
||||||
|
|
||||||
|
See `.cursor/rules/requestly-test-rules.mdc` for full docs.
|
||||||
|
|
||||||
|
## Environment Variables
|
||||||
|
|
||||||
|
```bash
|
||||||
|
DATABASE_URL=postgresql://user:password@localhost:5432/supermarket
|
||||||
|
MAGNIT_STORE_CODE=992301
|
||||||
|
DEBUG=true
|
||||||
|
```
|
||||||
354
MCP_EXAMPLES.md
Normal file
354
MCP_EXAMPLES.md
Normal file
@@ -0,0 +1,354 @@
|
|||||||
|
# MCP Examples for Supermarket Scraper
|
||||||
|
|
||||||
|
This document contains example queries and prompts you can use with the Postgres MCP server to test and analyze your scraping results.
|
||||||
|
|
||||||
|
## Setup
|
||||||
|
|
||||||
|
### 1. Install Docker (if not already installed)
|
||||||
|
- Windows: [Docker Desktop](https://www.docker.com/products/docker-desktop/)
|
||||||
|
- macOS: [Docker Desktop for Mac](https://www.docker.com/products/docker-desktop/)
|
||||||
|
- Linux: `sudo apt-get install docker.io`
|
||||||
|
|
||||||
|
### 2. Pull the Postgres MCP image
|
||||||
|
```bash
|
||||||
|
docker pull crystaldba/postgres-mcp
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Start your database
|
||||||
|
```bash
|
||||||
|
docker-compose up -d postgres
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Configure Claude Code with MCP
|
||||||
|
|
||||||
|
Copy the configuration from `.mcp.json.example` and add it to your Claude config:
|
||||||
|
|
||||||
|
| OS | Config File Location |
|
||||||
|
|----|---------------------|
|
||||||
|
| Windows | `%APPDATA%\Claude\claude_desktop_config.json` |
|
||||||
|
| macOS | `~/Library/Application Support/Claude/claude_desktop_config.json` |
|
||||||
|
| Linux | `~/.config/Claude/claude_desktop_config.json` |
|
||||||
|
|
||||||
|
Or via VSCode: `Settings` → `MCP` → `Configuration File`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Natural Language Prompts
|
||||||
|
|
||||||
|
You can ask the AI questions in natural language, and it will use Postgres MCP to query your database:
|
||||||
|
|
||||||
|
### Database Overview
|
||||||
|
- "What tables exist in the database?"
|
||||||
|
- "Show me the schema of the Product table"
|
||||||
|
- "What are the relationships between tables?"
|
||||||
|
- "Analyze the database health"
|
||||||
|
|
||||||
|
### Scraping Results
|
||||||
|
- "How many products are in the database?"
|
||||||
|
- "Show me products with the highest discounts"
|
||||||
|
- "Find products without categories"
|
||||||
|
- "What is the price distribution of products?"
|
||||||
|
- "Which stores have the most products?"
|
||||||
|
|
||||||
|
### Performance
|
||||||
|
- "Are there any slow queries?"
|
||||||
|
- "What indexes should I add to improve performance?"
|
||||||
|
- "Show me the database health report"
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## SQL Query Examples
|
||||||
|
|
||||||
|
You can also ask the AI to execute specific SQL queries using the MCP tools.
|
||||||
|
|
||||||
|
### 1. Basic Scraping Validation
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Total products count
|
||||||
|
SELECT COUNT(*) as total_products FROM "Product";
|
||||||
|
|
||||||
|
-- Products by store
|
||||||
|
SELECT s.name, COUNT(p.id) as product_count
|
||||||
|
FROM "Store" s
|
||||||
|
LEFT JOIN "Product" p ON s.id = p."storeId"
|
||||||
|
GROUP BY s.id, s.name;
|
||||||
|
|
||||||
|
-- Latest scraping session
|
||||||
|
SELECT * FROM "ScrapingSession"
|
||||||
|
ORDER BY "startedAt" DESC LIMIT 1;
|
||||||
|
|
||||||
|
-- All scraping sessions with status
|
||||||
|
SELECT
|
||||||
|
id,
|
||||||
|
"sourceType",
|
||||||
|
status,
|
||||||
|
"startedAt",
|
||||||
|
"finishedAt",
|
||||||
|
CASE
|
||||||
|
WHEN "finishedAt" IS NOT NULL
|
||||||
|
THEN EXTRACT(EPOCH FROM ("finishedAt" - "startedAt"))
|
||||||
|
ELSE NULL
|
||||||
|
END as duration_seconds
|
||||||
|
FROM "ScrapingSession"
|
||||||
|
ORDER BY "startedAt" DESC;
|
||||||
|
```
|
||||||
|
|
||||||
|
### 2. Category Analysis
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Products without categories
|
||||||
|
SELECT COUNT(*) FROM "Product" WHERE "categoryId" IS NULL;
|
||||||
|
|
||||||
|
-- Categories by product count
|
||||||
|
SELECT c.name, COUNT(p.id) as product_count
|
||||||
|
FROM "Category" c
|
||||||
|
LEFT JOIN "Product" p ON p."categoryId" = c.id
|
||||||
|
GROUP BY c.id, c.name
|
||||||
|
ORDER BY product_count DESC NULLS LAST
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
-- Category hierarchy with counts
|
||||||
|
SELECT
|
||||||
|
c1.name as category,
|
||||||
|
c2.name as parent_category,
|
||||||
|
COUNT(p.id) as product_count
|
||||||
|
FROM "Category" c1
|
||||||
|
LEFT JOIN "Category" c2 ON c1."parentId" = c2.id
|
||||||
|
LEFT JOIN "Product" p ON p."categoryId" = c1.id
|
||||||
|
GROUP BY c1.id, c1.name, c2.name
|
||||||
|
ORDER BY product_count DESC;
|
||||||
|
|
||||||
|
-- Top-level categories (no parent)
|
||||||
|
SELECT c.name, COUNT(p.id) as product_count
|
||||||
|
FROM "Category" c
|
||||||
|
LEFT JOIN "Product" p ON p."categoryId" = c.id
|
||||||
|
WHERE c."parentId" IS NULL
|
||||||
|
GROUP BY c.id, c.name
|
||||||
|
ORDER BY product_count DESC;
|
||||||
|
```
|
||||||
|
|
||||||
|
### 3. Price and Promotion Analysis
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Products with active discounts
|
||||||
|
SELECT
|
||||||
|
name,
|
||||||
|
"currentPrice",
|
||||||
|
"oldPrice",
|
||||||
|
"discountPercent",
|
||||||
|
"promotionEndDate"
|
||||||
|
FROM "Product"
|
||||||
|
WHERE "oldPrice" IS NOT NULL
|
||||||
|
AND ("promotionEndDate" IS NULL OR "promotionEndDate" > NOW())
|
||||||
|
ORDER BY "discountPercent" DESC
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
-- Expired promotions
|
||||||
|
SELECT
|
||||||
|
name,
|
||||||
|
"currentPrice",
|
||||||
|
"oldPrice",
|
||||||
|
"discountPercent",
|
||||||
|
"promotionEndDate"
|
||||||
|
FROM "Product"
|
||||||
|
WHERE "oldPrice" IS NOT NULL
|
||||||
|
AND "promotionEndDate" IS NOT NULL
|
||||||
|
AND "promotionEndDate" < NOW()
|
||||||
|
ORDER BY "promotionEndDate" DESC
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
-- Price distribution
|
||||||
|
SELECT
|
||||||
|
CASE
|
||||||
|
WHEN "currentPrice" < 100 THEN '0-100'
|
||||||
|
WHEN "currentPrice" < 500 THEN '100-500'
|
||||||
|
WHEN "currentPrice" < 1000 THEN '500-1000'
|
||||||
|
ELSE '1000+'
|
||||||
|
END as price_range,
|
||||||
|
COUNT(*) as count
|
||||||
|
FROM "Product"
|
||||||
|
GROUP BY price_range
|
||||||
|
ORDER BY price_range;
|
||||||
|
|
||||||
|
-- Most expensive products
|
||||||
|
SELECT name, "currentPrice", brand, unit
|
||||||
|
FROM "Product"
|
||||||
|
ORDER BY "currentPrice" DESC
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
-- Cheapest products
|
||||||
|
SELECT name, "currentPrice", brand, unit
|
||||||
|
FROM "Product"
|
||||||
|
WHERE "currentPrice" > 0
|
||||||
|
ORDER BY "currentPrice" ASC
|
||||||
|
LIMIT 20;
|
||||||
|
```
|
||||||
|
|
||||||
|
### 4. Data Quality Checks
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Products missing critical fields
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE name IS NULL OR name = '') as missing_name,
|
||||||
|
COUNT(*) FILTER (WHERE "categoryId" IS NULL) as missing_category,
|
||||||
|
COUNT(*) FILTER (WHERE brand IS NULL OR brand = '') as missing_brand,
|
||||||
|
COUNT(*) FILTER (WHERE "imageUrl" IS NULL OR "imageUrl" = '') as missing_image,
|
||||||
|
COUNT(*) FILTER (WHERE url IS NULL OR url = '') as missing_url,
|
||||||
|
COUNT(*) as total_products
|
||||||
|
FROM "Product";
|
||||||
|
|
||||||
|
-- Duplicate products check (same externalId for different stores)
|
||||||
|
SELECT "externalId", COUNT(*) as count
|
||||||
|
FROM "Product"
|
||||||
|
GROUP BY "externalId"
|
||||||
|
HAVING COUNT(*) > 1;
|
||||||
|
|
||||||
|
-- Products with strange prices (0 or negative)
|
||||||
|
SELECT name, "currentPrice", "oldPrice"
|
||||||
|
FROM "Product"
|
||||||
|
WHERE "currentPrice" <= 0 OR ("oldPrice" IS NOT NULL AND "oldPrice" <= 0)
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
-- Products with impossible discounts
|
||||||
|
SELECT name, "currentPrice", "oldPrice", "discountPercent"
|
||||||
|
FROM "Product"
|
||||||
|
WHERE "discountPercent" < 0 OR "discountPercent" > 100
|
||||||
|
LIMIT 20;
|
||||||
|
```
|
||||||
|
|
||||||
|
### 5. Rating Analysis
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Top rated products
|
||||||
|
SELECT
|
||||||
|
name,
|
||||||
|
rating,
|
||||||
|
"scoresCount",
|
||||||
|
"commentsCount",
|
||||||
|
brand
|
||||||
|
FROM "Product"
|
||||||
|
WHERE rating IS NOT NULL
|
||||||
|
ORDER BY rating DESC, "scoresCount" DESC
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
-- Most reviewed products
|
||||||
|
SELECT
|
||||||
|
name,
|
||||||
|
rating,
|
||||||
|
"scoresCount",
|
||||||
|
"commentsCount",
|
||||||
|
brand
|
||||||
|
FROM "Product"
|
||||||
|
WHERE "commentsCount" IS NOT NULL
|
||||||
|
ORDER BY "commentsCount" DESC
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
-- Products without ratings
|
||||||
|
SELECT COUNT(*) FROM "Product" WHERE rating IS NULL;
|
||||||
|
```
|
||||||
|
|
||||||
|
### 6. Brand Analysis
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Top brands by product count
|
||||||
|
SELECT brand, COUNT(*) as product_count
|
||||||
|
FROM "Product"
|
||||||
|
WHERE brand IS NOT NULL AND brand != ''
|
||||||
|
GROUP BY brand
|
||||||
|
ORDER BY product_count DESC
|
||||||
|
LIMIT 20;
|
||||||
|
|
||||||
|
-- Average price by brand (for brands with 10+ products)
|
||||||
|
SELECT
|
||||||
|
brand,
|
||||||
|
COUNT(*) as product_count,
|
||||||
|
AVG("currentPrice") as avg_price,
|
||||||
|
MIN("currentPrice") as min_price,
|
||||||
|
MAX("currentPrice") as max_price
|
||||||
|
FROM "Product"
|
||||||
|
WHERE brand IS NOT NULL AND brand != ''
|
||||||
|
GROUP BY brand
|
||||||
|
HAVING COUNT(*) >= 10
|
||||||
|
ORDER BY product_count DESC
|
||||||
|
LIMIT 20;
|
||||||
|
```
|
||||||
|
|
||||||
|
### 7. Health Check Queries
|
||||||
|
|
||||||
|
```sql
|
||||||
|
-- Table sizes
|
||||||
|
SELECT
|
||||||
|
schemaname,
|
||||||
|
tablename,
|
||||||
|
pg_size_pretty(pg_total_relation_size(schemaname||'.'||tablename)) AS size
|
||||||
|
FROM pg_tables
|
||||||
|
WHERE schemaname = 'public'
|
||||||
|
ORDER BY pg_total_relation_size(schemaname||'.'||tablename) DESC;
|
||||||
|
|
||||||
|
-- Index usage
|
||||||
|
SELECT
|
||||||
|
schemaname,
|
||||||
|
tablename,
|
||||||
|
indexname,
|
||||||
|
idx_scan,
|
||||||
|
idx_tup_read,
|
||||||
|
idx_tup_fetch
|
||||||
|
FROM pg_stat_user_indexes
|
||||||
|
ORDER BY idx_scan DESC;
|
||||||
|
|
||||||
|
-- Table row counts
|
||||||
|
SELECT
|
||||||
|
'Store' as table_name,
|
||||||
|
COUNT(*) as row_count
|
||||||
|
FROM "Store"
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'Category', COUNT(*) FROM "Category"
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'Product', COUNT(*) FROM "Product"
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'ScrapingSession', COUNT(*) FROM "ScrapingSession";
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## MCP Tools Reference
|
||||||
|
|
||||||
|
Postgres MCP provides these tools that the AI can use:
|
||||||
|
|
||||||
|
| Tool | Description |
|
||||||
|
|------|-------------|
|
||||||
|
| `list_schemas` | Lists all database schemas |
|
||||||
|
| `list_objects` | Lists tables, views, sequences in a schema |
|
||||||
|
| `get_object_details` | Gets table/column details |
|
||||||
|
| `execute_sql` | Executes SQL queries |
|
||||||
|
| `explain_query` | Shows query execution plan |
|
||||||
|
| `get_top_queries` | Reports slowest queries |
|
||||||
|
| `analyze_workload_indexes` | Recommends indexes for workload |
|
||||||
|
| `analyze_db_health` | Performs comprehensive health checks |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Example Workflow
|
||||||
|
|
||||||
|
Here's a typical workflow for testing scraping results:
|
||||||
|
|
||||||
|
1. **Start the database**:
|
||||||
|
```bash
|
||||||
|
docker-compose up -d postgres
|
||||||
|
```
|
||||||
|
|
||||||
|
2. **Run the scraper**:
|
||||||
|
```bash
|
||||||
|
pnpm dev
|
||||||
|
```
|
||||||
|
|
||||||
|
3. **Ask the AI to verify**:
|
||||||
|
- "Check the database health"
|
||||||
|
- "How many products were scraped?"
|
||||||
|
- "Are there any products without categories?"
|
||||||
|
- "Show me the top 20 products by discount"
|
||||||
|
- "Find any data quality issues"
|
||||||
|
|
||||||
|
4. **Analyze performance**:
|
||||||
|
- "Are there any slow queries?"
|
||||||
|
- "Should I add any indexes?"
|
||||||
297
PROJECT.md
Normal file
297
PROJECT.md
Normal file
@@ -0,0 +1,297 @@
|
|||||||
|
# Supermarket Scraper System
|
||||||
|
|
||||||
|
## Обзор проекта
|
||||||
|
|
||||||
|
Система для скрапинга товаров из российских супермаркетов (Магнит, Пятёрочка/5ka) с хранением в PostgreSQL, векторным поиском через pgvector, и интеграцией LLM для естественных запросов.
|
||||||
|
|
||||||
|
**Текущий статус:** MVP Phase - Magnit API scraping functional
|
||||||
|
|
||||||
|
**Последнее обновление:** 2025-01-21
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Текущий статус (Progress Tracker)
|
||||||
|
|
||||||
|
### ✅ Завершено
|
||||||
|
|
||||||
|
| Этап | Название | Статус | Описание |
|
||||||
|
|------|----------|--------|----------|
|
||||||
|
| 1 | База данных + Prisma | ✅ Done | PostgreSQL, Prisma ORM, pgvector extension |
|
||||||
|
| 2 | Magnit API Scraper | ✅ Done | Hybrid Playwright+Axios, streaming mode, retry logic |
|
||||||
|
| - | Docker Infrastructure | ✅ Done | PostgreSQL + pgAdmin + CloudBeaver |
|
||||||
|
|
||||||
|
### ⏳ В работе
|
||||||
|
|
||||||
|
| Этап | Название | Статус | Следующий шаг |
|
||||||
|
|------|----------|--------|---------------|
|
||||||
|
| - | Debug скрипты → Тесты | ⏳ TODO | Превратить debug scripts в Vitest тесты |
|
||||||
|
|
||||||
|
### 📋 Запланировано
|
||||||
|
|
||||||
|
| Этап | Название | Приоритет | Зависимости |
|
||||||
|
|------|----------|-----------|-------------|
|
||||||
|
| 3 | Embeddings + Vector Search | High | Этап 1-2 |
|
||||||
|
| 4 | BaseApiScraper + 5ka API | High | Этап 2 |
|
||||||
|
| 5 | Web Scraping (Playwright) | Medium | Этап 4 |
|
||||||
|
| 6 | Android Scraping (Appium) | Low | Этап 5 |
|
||||||
|
| 7 | LangChain Integration | Medium | Этап 3 |
|
||||||
|
| 8 | REST API + Scheduler | Medium | Этап 7 |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Roadmap
|
||||||
|
|
||||||
|
### Этап 1: База данных и Prisma ✅
|
||||||
|
- [x] PostgreSQL с Docker
|
||||||
|
- [x] Prisma ORM setup
|
||||||
|
- [x] Модели: Store, Category, Product, ScrapingSession
|
||||||
|
- [x] pgvector extension (подготовлено для embeddings)
|
||||||
|
|
||||||
|
### Этап 2: Magnit API Scraping ✅
|
||||||
|
- [x] Hybrid approach: Playwright (сессия) + Axios (API запросы)
|
||||||
|
- [x] Обход 403 Forbidden с auto-reinit
|
||||||
|
- [x] Streaming mode для больших каталогов
|
||||||
|
- [x] Retry logic с exponential backoff
|
||||||
|
- [x] Rate limiting и защита от infinite loops
|
||||||
|
|
||||||
|
### Этап 3: Embeddings и Vector Search 📋
|
||||||
|
**Следующий приоритетный этап**
|
||||||
|
|
||||||
|
- [ ] Добавить `embedding vector(1536)` поля в Product и Category
|
||||||
|
- [ ] Создать миграцию pgvector
|
||||||
|
- [ ] EmbeddingService (OpenAI или Ollama)
|
||||||
|
- [ ] Векторный поиск товаров
|
||||||
|
|
||||||
|
### Этап 4: API Scraping - Полная реализация 📋
|
||||||
|
- [ ] BaseApiScraper с общим функционалом
|
||||||
|
- [ ] 5ka API scraper
|
||||||
|
- [ ] Category-based pagination
|
||||||
|
|
||||||
|
### Этап 5: Web и Android Scraping 📋
|
||||||
|
- [ ] MagnitWebScraper (Playwright fallback)
|
||||||
|
- [ ] 5kaWebScraper
|
||||||
|
- [ ] MagnitAppScraper (Appium)
|
||||||
|
- [ ] 5kaAppScraper
|
||||||
|
|
||||||
|
### Этап 6: LangChain Integration 📋
|
||||||
|
- [ ] SQL Agent для natural language запросов
|
||||||
|
- [ ] Query chains
|
||||||
|
- [ ] Пример: "найди самый дешевый попкорн"
|
||||||
|
|
||||||
|
### Этап 7: Scheduler и REST API 📋
|
||||||
|
- [ ] SchedulerService (node-cron)
|
||||||
|
- [ ] REST API server (Express/Fastify)
|
||||||
|
- [ ] Endpoints для n8n интеграции
|
||||||
|
- [ ] Job monitoring
|
||||||
|
|
||||||
|
### Этап 8: Price History Analytics 📋
|
||||||
|
- [ ] PriceHistory model (уже в схеме)
|
||||||
|
- [ ] Автоматический трекинг цен
|
||||||
|
- [ ] Уведомления об изменениях
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Архитектура
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────┐
|
||||||
|
│ Data Sources │
|
||||||
|
├──────────┬──────────────┬───────────────────────────────────┤
|
||||||
|
│ API │ Web Scrapers│ Android App Scrapers │
|
||||||
|
│ Scrapers │ (Playwright) │ (Appium) │
|
||||||
|
│ │ │ │
|
||||||
|
│ ✅ Magnit│ ⏳ magnit.ru │ ⏳ Магнит Android App │
|
||||||
|
│ ⏳ 5ka │ ⏳ 5ka.ru │ ⏳ 5ka Android App │
|
||||||
|
└──────────┴──────────────┴───────────────────────────────────┘
|
||||||
|
│
|
||||||
|
┌────────────▼────────────┐
|
||||||
|
│ Data Processing Layer │
|
||||||
|
│ - Parsing & Normalize │
|
||||||
|
│ - Embeddings Generation│
|
||||||
|
└────────────┬────────────┘
|
||||||
|
│
|
||||||
|
┌────────────▼────────────┐
|
||||||
|
│ PostgreSQL Database │
|
||||||
|
│ - pgvector extension │
|
||||||
|
│ - Products, Categories │
|
||||||
|
│ - Price History │
|
||||||
|
└────────────┬────────────┘
|
||||||
|
│
|
||||||
|
┌────────────▼────────────┐
|
||||||
|
│ LangChain + LLM │
|
||||||
|
│ - SQL Agent │
|
||||||
|
│ - Query Interface │
|
||||||
|
└─────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Стек технологий
|
||||||
|
|
||||||
|
| Категория | Технология |
|
||||||
|
|-----------|------------|
|
||||||
|
| Runtime | Node.js + TypeScript |
|
||||||
|
| Database | PostgreSQL 15+ + pgvector |
|
||||||
|
| ORM | Prisma |
|
||||||
|
| API Scraping | Axios + Playwright |
|
||||||
|
| Web Scraping | Playwright |
|
||||||
|
| Mobile | Appium + WebDriverIO |
|
||||||
|
| LLM | LangChain + OpenAI/Ollama |
|
||||||
|
| Scheduler | node-cron |
|
||||||
|
| HTTP Server | Express/Fastify |
|
||||||
|
| Testing | Vitest |
|
||||||
|
| Package Manager | pnpm |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Quick Start
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Установка зависимостей
|
||||||
|
pnpm install
|
||||||
|
|
||||||
|
# Установка браузеров для Playwright
|
||||||
|
pnpm exec playwright install chromium
|
||||||
|
|
||||||
|
# Настройка переменных окружения
|
||||||
|
cp .env.example .env
|
||||||
|
# Отредактируйте .env: DATABASE_URL, MAGNIT_STORE_CODE
|
||||||
|
|
||||||
|
# Запуск PostgreSQL
|
||||||
|
docker-compose up -d
|
||||||
|
|
||||||
|
# Генерация Prisma Client
|
||||||
|
pnpm prisma:generate
|
||||||
|
|
||||||
|
# Создание миграций
|
||||||
|
pnpm prisma:migrate
|
||||||
|
|
||||||
|
# Запуск скрапинга
|
||||||
|
pnpm dev
|
||||||
|
|
||||||
|
# Доступ к DB Admin:
|
||||||
|
# pgAdmin: http://localhost:5050
|
||||||
|
# CloudBeaver: http://localhost:8978
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Конфигурация
|
||||||
|
|
||||||
|
### Переменные окружения (.env)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# Database
|
||||||
|
DATABASE_URL=postgresql://user:password@localhost:5432/supermarket
|
||||||
|
|
||||||
|
# Magnit Store
|
||||||
|
MAGNIT_STORE_CODE=992301
|
||||||
|
MAGNIT_STORE_TYPE=6
|
||||||
|
MAGNIT_CATALOG_TYPE=1
|
||||||
|
|
||||||
|
# Scraping Options
|
||||||
|
MAGNIT_USE_STREAMING=true # streaming mode (рекомендуется)
|
||||||
|
MAGNIT_PAGE_SIZE=50 # размер страницы API
|
||||||
|
MAGNIT_MAX_PRODUCTS= # лимит товаров (пусто = без лимита)
|
||||||
|
MAGNIT_RATE_LIMIT_DELAY=300 # задержка между запросами (ms)
|
||||||
|
MAGNIT_MAX_ITERATIONS=10000 # защита от бесконечного цикла
|
||||||
|
MAGNIT_HEADLESS=true # headless режим браузера
|
||||||
|
|
||||||
|
# Resilience
|
||||||
|
MAGNIT_RETRY_ATTEMPTS=3 # количество попыток retry
|
||||||
|
MAGNIT_REQUEST_TIMEOUT=30000 # timeout запросов (ms)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Структура проекта
|
||||||
|
|
||||||
|
```
|
||||||
|
supermarket/
|
||||||
|
├── src/
|
||||||
|
│ ├── config/
|
||||||
|
│ │ └── database.ts # DB connection & setup
|
||||||
|
│ ├── database/
|
||||||
|
│ │ ├── prisma/
|
||||||
|
│ │ │ └── schema.prisma # Prisma schema
|
||||||
|
│ │ └── client.ts # Prisma Client instance
|
||||||
|
│ ├── scrapers/
|
||||||
|
│ │ ├── api/
|
||||||
|
│ │ │ └── magnit/
|
||||||
|
│ │ │ ├── MagnitApiScraper.ts
|
||||||
|
│ │ │ └── types.ts
|
||||||
|
│ │ ├── web/ # TODO: Playwright scrapers
|
||||||
|
│ │ └── android/ # TODO: Appium scrapers
|
||||||
|
│ ├── services/
|
||||||
|
│ │ ├── product/
|
||||||
|
│ │ │ ├── ProductService.ts
|
||||||
|
│ │ │ └── ProductParser.ts
|
||||||
|
│ │ └── embeddings/ # TODO: EmbeddingService
|
||||||
|
│ ├── scripts/
|
||||||
|
│ │ └── scrape-magnit-products.ts
|
||||||
|
│ └── utils/
|
||||||
|
│ ├── logger.ts
|
||||||
|
│ ├── errors.ts
|
||||||
|
│ └── retry.ts
|
||||||
|
├── tests/ # TODO: Create tests
|
||||||
|
│ └── integration/
|
||||||
|
├── docker-compose.yml
|
||||||
|
├── package.json
|
||||||
|
├── README.md # Basic setup guide
|
||||||
|
├── CLAUDE.md # Instructions for Claude Code
|
||||||
|
├── PROJECT.md # This file - roadmap & status
|
||||||
|
└── .cursor/
|
||||||
|
└── plans/
|
||||||
|
└── supermarket_scraper_system_1af4ed29.plan.md
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Недавние изменения
|
||||||
|
|
||||||
|
### [9164527] feat: enhanced Magnit scraper with streaming mode and retry logic (2025-01-21)
|
||||||
|
|
||||||
|
- ✅ Streaming mode для memory-efficient больших каталогов
|
||||||
|
- ✅ Retry logic с exponential backoff
|
||||||
|
- ✅ Auto session reinitialization on 403 errors
|
||||||
|
- ✅ Configurable options (pageSize, maxProducts, rateLimitDelay)
|
||||||
|
- ✅ maxIterations защита от infinite loops
|
||||||
|
- ✅ retry.ts utility module
|
||||||
|
- ✅ Docker: pgAdmin + CloudBeaver
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Что дальше?
|
||||||
|
|
||||||
|
### Ближайшие задачи (Priority Order):
|
||||||
|
|
||||||
|
1. **Debug скрипты → Тесты** (в процессе)
|
||||||
|
- Превратить `analyze-category-nulls.ts`, `check-product-details.ts`, `inspect-api-response.ts` в Vitest тесты
|
||||||
|
- Настроить `tests/integration/` структуру
|
||||||
|
|
||||||
|
2. **Embeddings (Этап 3)**
|
||||||
|
- Добавить pgvector поля в schema
|
||||||
|
- Создать EmbeddingService
|
||||||
|
- Генерация embeddings для товаров
|
||||||
|
|
||||||
|
3. **5ka API Scraper (Этап 4)**
|
||||||
|
- BaseApiScraper abstraction
|
||||||
|
- 5ka API integration
|
||||||
|
|
||||||
|
4. **REST API + Scheduler (Этап 7)**
|
||||||
|
- HTTP server для внешних интеграций
|
||||||
|
- Cron-like планировщик
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Полезные ссылки
|
||||||
|
|
||||||
|
- **Cursor Plan**: [.cursor/plans/supermarket_scraper_system_1af4ed29.plan.md](.cursor/plans/supermarket_scraper_system_1af4ed29.plan.md) - Полная техническая спецификация
|
||||||
|
- **Prisma Schema**: [src/database/prisma/schema.prisma](src/database/prisma/schema.prisma)
|
||||||
|
- **Main Scraper**: [src/scrapers/api/magnit/MagnitApiScraper.ts](src/scrapers/api/magnit/MagnitApiScraper.ts)
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Лицензия
|
||||||
|
|
||||||
|
Private project
|
||||||
@@ -47,6 +47,19 @@ services:
|
|||||||
postgres:
|
postgres:
|
||||||
condition: service_healthy
|
condition: service_healthy
|
||||||
|
|
||||||
|
postgres-mcp:
|
||||||
|
image: crystaldba/postgres-mcp:latest
|
||||||
|
container_name: supermarket-postgres-mcp
|
||||||
|
restart: unless-stopped
|
||||||
|
environment:
|
||||||
|
DATABASE_URI: postgresql://user:password@postgres:5432/supermarket
|
||||||
|
ports:
|
||||||
|
- "8000:8000"
|
||||||
|
command: ["--access-mode=unrestricted", "--transport=sse"]
|
||||||
|
depends_on:
|
||||||
|
postgres:
|
||||||
|
condition: service_healthy
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
postgres_data:
|
postgres_data:
|
||||||
pgadmin_data:
|
pgadmin_data:
|
||||||
|
|||||||
158
docs/E2E_GUIDE.md
Normal file
158
docs/E2E_GUIDE.md
Normal file
@@ -0,0 +1,158 @@
|
|||||||
|
# Руководство по скрапингу товаров Магнит
|
||||||
|
|
||||||
|
## 📋 Обзор
|
||||||
|
|
||||||
|
Процесс состоит из двух этапов:
|
||||||
|
1. **Базовый скрапинг** - получение списка товаров через API поиска
|
||||||
|
2. **Обогащение деталями** - получение бренда, описания, веса для каждого товара
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🚀 Этап 1: Базовый скрапинг
|
||||||
|
|
||||||
|
### Что делает:
|
||||||
|
- Сканирует каталог товаров через API поиска
|
||||||
|
- Сохраняет базовую информацию: название, цена, рейтинг, изображение
|
||||||
|
- Сохраняет товары в базу данных с `isDetailsFetched = false`
|
||||||
|
|
||||||
|
### Запуск:
|
||||||
|
```bash
|
||||||
|
pnpm dev
|
||||||
|
```
|
||||||
|
|
||||||
|
### Опционально: указать магазин
|
||||||
|
```bash
|
||||||
|
MAGNIT_STORE_CODE=992301 pnpm dev
|
||||||
|
```
|
||||||
|
|
||||||
|
### Результат:
|
||||||
|
- В таблице `Product` появляются записи с базовыми данными
|
||||||
|
- Поля `brand`, `description`, `weight`, `unit` пустые
|
||||||
|
- `isDetailsFetched = false`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ✨ Этап 2: Обогащение деталями
|
||||||
|
|
||||||
|
### Что делает:
|
||||||
|
- Получает товары с `isDetailsFetched = false`
|
||||||
|
- Для каждого товара запрашивает детали через специальный API endpoint
|
||||||
|
- Обновляет: бренд, описание, вес, единицу измерения
|
||||||
|
- Ставит `isDetailsFetched = true`
|
||||||
|
|
||||||
|
### Запуск:
|
||||||
|
```bash
|
||||||
|
pnpm enrich
|
||||||
|
```
|
||||||
|
|
||||||
|
### Опционально: указать магазин
|
||||||
|
```bash
|
||||||
|
MAGNIT_STORE_CODE=992301 pnpm enrich
|
||||||
|
```
|
||||||
|
|
||||||
|
### Результат:
|
||||||
|
- Поля `brand`, `description`, `weight`, `unit` заполнены
|
||||||
|
- Все товары имеют `isDetailsFetched = true`
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🔄 Полный цикл (последовательный)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
# 1. Базовый скрапинг
|
||||||
|
pnpm dev
|
||||||
|
|
||||||
|
# 2. Обогащение деталями
|
||||||
|
pnpm enrich
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🧪 Тестирование
|
||||||
|
|
||||||
|
### Проверить соединение с БД:
|
||||||
|
```bash
|
||||||
|
pnpm test-db
|
||||||
|
```
|
||||||
|
|
||||||
|
### Проверить detail endpoint:
|
||||||
|
```bash
|
||||||
|
pnpm test-detail-endpoint
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📊 Проверка результатов
|
||||||
|
|
||||||
|
### Через SQL:
|
||||||
|
```sql
|
||||||
|
-- Количество товаров
|
||||||
|
SELECT COUNT(*) FROM "Product";
|
||||||
|
|
||||||
|
-- Сколько обогащено
|
||||||
|
SELECT
|
||||||
|
COUNT(*) FILTER (WHERE "isDetailsFetched" = true) as enriched,
|
||||||
|
COUNT(*) FILTER (WHERE "isDetailsFetched" = false) as pending
|
||||||
|
FROM "Product";
|
||||||
|
|
||||||
|
-- Процент NULL полей
|
||||||
|
SELECT
|
||||||
|
'brand' as field,
|
||||||
|
ROUND(100.0 * SUM(CASE WHEN brand IS NULL THEN 1 ELSE 0 END) / COUNT(*), 2) as null_percent
|
||||||
|
FROM "Product"
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'description', ROUND(100.0 * SUM(CASE WHEN description IS NULL THEN 1 ELSE 0 END) / COUNT(*), 2) FROM "Product"
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'weight', ROUND(100.0 * SUM(CASE WHEN weight IS NULL THEN 1 ELSE 0 END) / COUNT(*), 2) FROM "Product"
|
||||||
|
UNION ALL
|
||||||
|
SELECT 'unit', ROUND(100.0 * SUM(CASE WHEN unit IS NULL THEN 1 ELSE 0 END) / COUNT(*), 2) FROM "Product";
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 🛠️ Управление миграциями
|
||||||
|
|
||||||
|
### Создать и применить миграцию:
|
||||||
|
```bash
|
||||||
|
pnpm prisma:migrate --name название_миграции
|
||||||
|
```
|
||||||
|
|
||||||
|
### Пересоздать Prisma Client:
|
||||||
|
```bash
|
||||||
|
pnpm prisma:generate
|
||||||
|
```
|
||||||
|
|
||||||
|
### Открыть Prisma Studio:
|
||||||
|
```bash
|
||||||
|
pnpm prisma:studio
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## ⚠️ Возможные проблемы
|
||||||
|
|
||||||
|
### Advisory lock при миграции
|
||||||
|
Если при миграции возникает `P1002` error:
|
||||||
|
```bash
|
||||||
|
# Перезапустить PostgreSQL контейнер
|
||||||
|
docker restart supermarket-postgres
|
||||||
|
```
|
||||||
|
|
||||||
|
### 403 Forbidden при скрапинге
|
||||||
|
Скрапер автоматически переинициализирует сессию. Проверьте логи.
|
||||||
|
|
||||||
|
### Пустые результаты
|
||||||
|
Проверьте, что магазин с указанным `MAGNIT_STORE_CODE` существует.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## 📝 Доступные команды
|
||||||
|
|
||||||
|
| Команда | Описание |
|
||||||
|
|---------|----------|
|
||||||
|
| `pnpm dev` | Базовый скрапинг товаров |
|
||||||
|
| `pnpm enrich` | Обогащение деталями |
|
||||||
|
| `pnpm test-db` | Проверка соединения с БД |
|
||||||
|
| `pnpm test-detail-endpoint` | Тест detail API |
|
||||||
|
| `pnpm type-check` | Проверка типов TypeScript |
|
||||||
|
| `pnpm prisma:studio` | GUI для работы с БД |
|
||||||
42
experiments/README.md
Normal file
42
experiments/README.md
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# Experiments Directory
|
||||||
|
|
||||||
|
This directory contains archived debug and experimental scripts used during development.
|
||||||
|
|
||||||
|
## Structure
|
||||||
|
|
||||||
|
### `magnit-detail-endpoints/`
|
||||||
|
Scripts used to discover and test Magnit API endpoints for product details:
|
||||||
|
- `debug-detail-response.ts` - Debug API response structure and field parsing
|
||||||
|
- `test-detail-endpoint.ts` - Test specific detail endpoints using MagnitApiScraper
|
||||||
|
- `test-all-detail-endpoints.ts` - Test multiple endpoints for product details
|
||||||
|
- `test-object-reviews-endpoint.ts` - Test user-reviews-and-object-info endpoint
|
||||||
|
- `find-product-detail-api.ts` - Find correct API endpoints for product details
|
||||||
|
- `find-product-detail-endpoint-v1.ts` - Find v1 API endpoints for product details
|
||||||
|
|
||||||
|
**Purpose**: These scripts were used to reverse-engineer the Magnit product detail API during the enrichment feature development.
|
||||||
|
|
||||||
|
### `html-extraction/`
|
||||||
|
Scripts for web scraping fallback exploration:
|
||||||
|
- `extract-product-from-html.ts` - Extract product data from HTML for future Playwright-based web scraping
|
||||||
|
|
||||||
|
**Purpose**: Experimental code for planned web scraping functionality (Phase 5).
|
||||||
|
|
||||||
|
## Usage
|
||||||
|
|
||||||
|
These scripts are **not production code** and are kept for reference. They may:
|
||||||
|
- Use DOM APIs (`document`, `window`) that require browser context
|
||||||
|
- Have hard-coded test data
|
||||||
|
- Be one-off experiments that are no longer maintained
|
||||||
|
|
||||||
|
To run these scripts, you may need to adjust `tsconfig.json` to include DOM types:
|
||||||
|
```json
|
||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"lib": ["ES2023", "dom"]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
## Status
|
||||||
|
|
||||||
|
Archived - No longer actively maintained. Kept for historical reference and potential future use.
|
||||||
116
experiments/html-extraction/extract-product-from-html.ts
Normal file
116
experiments/html-extraction/extract-product-from-html.ts
Normal file
@@ -0,0 +1,116 @@
|
|||||||
|
import 'dotenv/config';
|
||||||
|
import { chromium } from 'playwright';
|
||||||
|
import * as fs from 'fs';
|
||||||
|
import { Logger } from '../../utils/logger.js';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
Logger.info('=== Извлечение данных о товаре из HTML ===\n');
|
||||||
|
|
||||||
|
const browser = await chromium.launch({ headless: true });
|
||||||
|
const context = await browser.newContext();
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
const productUrl = 'https://magnit.ru/product/1000233138-podguzniki_la_fresh_dlya_vzroslykh_l_10sht?shopCode=992301&shopType=6';
|
||||||
|
|
||||||
|
Logger.info(`Загружаю страницу: ${productUrl}`);
|
||||||
|
|
||||||
|
await page.goto(productUrl, {
|
||||||
|
waitUntil: 'domcontentloaded',
|
||||||
|
timeout: 20000,
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.waitForTimeout(3000);
|
||||||
|
|
||||||
|
// Извлекаем данные из HTML
|
||||||
|
const productData = await page.evaluate(() => {
|
||||||
|
const result: any = {
|
||||||
|
title: document.querySelector('h1')?.textContent?.trim() || '',
|
||||||
|
// Ищем brand, description, weight в разных местах
|
||||||
|
};
|
||||||
|
|
||||||
|
// 1. Ищем в meta тегах
|
||||||
|
const metaBrand = document.querySelector('meta[itemprop="brand"]')?.content;
|
||||||
|
const metaDesc = document.querySelector('meta[itemprop="description"]')?.content;
|
||||||
|
const metaWeight = document.querySelector('meta[itemprop="weight"]')?.content;
|
||||||
|
|
||||||
|
// 2. Ищем в JSON-LD structured data
|
||||||
|
const jsonLdScripts = Array.from(document.querySelectorAll('script[type="application/ld+json"]'));
|
||||||
|
for (const script of jsonLdScripts) {
|
||||||
|
try {
|
||||||
|
const json = JSON.parse(script.textContent || '');
|
||||||
|
if (json['@type'] === 'Product' || json.name === 'Product') {
|
||||||
|
result.jsonLd = json;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (e) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 3. Ищем в window объектах
|
||||||
|
const nuxtData = (window as any).__NUXT__;
|
||||||
|
if (nuxtData) {
|
||||||
|
result.nuxtKeys = Object.keys(nuxtData);
|
||||||
|
// Проверяем все возможные места с данными о товаре
|
||||||
|
for (const key of Object.keys(nuxtData)) {
|
||||||
|
const val = nuxtData[key];
|
||||||
|
if (val && typeof val === 'object') {
|
||||||
|
const str = JSON.stringify(val);
|
||||||
|
if (str.includes('brand') || str.includes('description') || str.includes('weight')) {
|
||||||
|
result.nuxtDataKey = key;
|
||||||
|
result.nuxtDataPreview = str.substring(0, 500);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Ищем в других script тегах
|
||||||
|
const allScripts = Array.from(document.querySelectorAll('script'));
|
||||||
|
for (const script of allScripts) {
|
||||||
|
const text = script.textContent || '';
|
||||||
|
if (text.includes('"brand"') && text.length > 100 && text.length < 100000) {
|
||||||
|
try {
|
||||||
|
// Попробуем найти JSON
|
||||||
|
const match = text.match(/\{[\s\S]*\}/);
|
||||||
|
if (match) {
|
||||||
|
try {
|
||||||
|
const json = JSON.parse(match[0]);
|
||||||
|
if (json.brand || json.description || json.weight) {
|
||||||
|
result.foundInScript = true;
|
||||||
|
result.scriptDataPreview = JSON.stringify(json).substring(0, 500);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (e2) {}
|
||||||
|
}
|
||||||
|
} catch (e) {}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Ищем в data-атрибутах
|
||||||
|
const productElement = document.querySelector('[data-product-id], [data-product], [id*="product"]');
|
||||||
|
if (productElement) {
|
||||||
|
result.productElement = productElement.outerHTML.substring(0, 500);
|
||||||
|
}
|
||||||
|
|
||||||
|
// 6. Проверяем структурированные данные
|
||||||
|
result.structuredData = {
|
||||||
|
metaBrand,
|
||||||
|
metaDesc,
|
||||||
|
metaWeight,
|
||||||
|
};
|
||||||
|
|
||||||
|
return result;
|
||||||
|
});
|
||||||
|
|
||||||
|
Logger.info('=== РЕЗУЛЬТАТЫ ===\n');
|
||||||
|
Logger.info(JSON.stringify(productData, null, 2));
|
||||||
|
|
||||||
|
// Также сохраним HTML для анализа
|
||||||
|
const html = await page.content();
|
||||||
|
const outputPath = 'temp-product-page.html';
|
||||||
|
fs.writeFileSync(outputPath, html, 'utf-8');
|
||||||
|
Logger.info(`\nHTML сохранен в: ${outputPath}`);
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
81
experiments/magnit-detail-endpoints/debug-detail-response.ts
Normal file
81
experiments/magnit-detail-endpoints/debug-detail-response.ts
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
import 'dotenv/config';
|
||||||
|
import { chromium } from 'playwright';
|
||||||
|
import axios from 'axios';
|
||||||
|
import { Logger } from '../../utils/logger.js';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
Logger.info('=== Debug: Смотрим что возвращает API ===\n');
|
||||||
|
|
||||||
|
const browser = await chromium.launch({ headless: true });
|
||||||
|
const context = await browser.newContext();
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
await page.goto('https://magnit.ru/', { waitUntil: 'domcontentloaded' });
|
||||||
|
const cookies = await context.cookies();
|
||||||
|
const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ');
|
||||||
|
|
||||||
|
const mgUdiCookie = cookies.find(c => c.name === 'mg_udi');
|
||||||
|
const deviceId = mgUdiCookie?.value || '';
|
||||||
|
|
||||||
|
const httpClient = axios.create({
|
||||||
|
baseURL: 'https://magnit.ru',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Cookie': cookieStr,
|
||||||
|
'x-device-id': deviceId,
|
||||||
|
'x-client-name': 'magnit',
|
||||||
|
'x-device-platform': 'Web',
|
||||||
|
'x-new-magnit': 'true',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
|
// Тестируем на конкретном товаре из логов
|
||||||
|
const productId = '1000201813'; // Презервативы Durex - там был brand
|
||||||
|
|
||||||
|
const endpoint = `/webgate/v2/goods/${productId}/stores/992301?storetype=2&catalogtype=1`;
|
||||||
|
|
||||||
|
Logger.info(`Запрос: ${endpoint}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
const response = await httpClient.get(endpoint);
|
||||||
|
Logger.info(`Status: ${response.status}\n`);
|
||||||
|
|
||||||
|
const data = response.data;
|
||||||
|
console.log(JSON.stringify(data, null, 2));
|
||||||
|
|
||||||
|
// Анализируем что есть в ответе
|
||||||
|
if (data.details && data.details.length > 0) {
|
||||||
|
Logger.info(`\n=== АНАЛИЗ details массива (${data.details.length} элементов) ===\n`);
|
||||||
|
|
||||||
|
for (let i = 0; i < Math.min(data.details.length, 15); i++) {
|
||||||
|
const detail = data.details[i];
|
||||||
|
Logger.info(`${i + 1}. name: "${detail.name}" | value: "${detail.value}"`);
|
||||||
|
|
||||||
|
// Проверяем парсинг
|
||||||
|
const name = detail.name.toLowerCase();
|
||||||
|
|
||||||
|
if (name.includes('бренд') || name === 'brand') {
|
||||||
|
Logger.info(` → Это БРЕНД!`);
|
||||||
|
} else if (name.includes('описание') || name === 'description') {
|
||||||
|
Logger.info(` → Это ОПИСАНИЕ!`);
|
||||||
|
} else if (name.includes('вес') || name.includes('weight')) {
|
||||||
|
Logger.info(` → Это ВЕС!`);
|
||||||
|
} else if (name.includes('единица') || name.includes('unit')) {
|
||||||
|
Logger.info(` → Это ЕДИНИЦА!`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (data.categories && data.categories.length > 0) {
|
||||||
|
Logger.info(`\nCategories: ${data.categories.join(', ')}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error: any) {
|
||||||
|
Logger.error('Ошибка:', error.message);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
149
experiments/magnit-detail-endpoints/find-product-detail-api.ts
Normal file
149
experiments/magnit-detail-endpoints/find-product-detail-api.ts
Normal file
@@ -0,0 +1,149 @@
|
|||||||
|
import 'dotenv/config';
|
||||||
|
import { chromium } from 'playwright';
|
||||||
|
import axios from 'axios';
|
||||||
|
import { Logger } from '../../utils/logger.js';
|
||||||
|
|
||||||
|
async function findDetailApiViaDirectRequest() {
|
||||||
|
Logger.info('=== МЕТОД 1: Прямой GET запрос к API ===\n');
|
||||||
|
|
||||||
|
const productId = '1000233138';
|
||||||
|
const storeCode = process.env.MAGNIT_STORE_CODE || '992301';
|
||||||
|
|
||||||
|
// Сначала получим cookies через Playwright
|
||||||
|
const browser = await chromium.launch({ headless: true });
|
||||||
|
const context = await browser.newContext();
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
await page.goto('https://magnit.ru/', { waitUntil: 'domcontentloaded' });
|
||||||
|
const cookies = await context.cookies();
|
||||||
|
const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ');
|
||||||
|
|
||||||
|
const mgUdiCookie = cookies.find(c => c.name === 'mg_udi');
|
||||||
|
const deviceId = mgUdiCookie?.value || '';
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
|
// Теперь пробуем разные endpoints
|
||||||
|
const httpClient = axios.create({
|
||||||
|
baseURL: 'https://magnit.ru',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Cookie': cookieStr,
|
||||||
|
'x-device-id': deviceId,
|
||||||
|
'x-client-name': 'magnit',
|
||||||
|
'x-device-platform': 'Web',
|
||||||
|
'x-new-magnit': 'true',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
const endpoints = [
|
||||||
|
`/webgate/v2/goods/${productId}?storeCode=${storeCode}&storeType=6`,
|
||||||
|
`/webgate/v2/goods/${productId}?shopCode=${storeCode}&shopType=6`,
|
||||||
|
`/webgate/v2/products/${productId}?storeCode=${storeCode}`,
|
||||||
|
`/webgate/v2/catalog/product/${productId}?storeCode=${storeCode}`,
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const endpoint of endpoints) {
|
||||||
|
try {
|
||||||
|
Logger.info(`Пробую: GET ${endpoint}`);
|
||||||
|
const response = await httpClient.get(endpoint);
|
||||||
|
Logger.info(`✅ Status: ${response.status}`);
|
||||||
|
if (response.data) {
|
||||||
|
const json = JSON.stringify(response.data);
|
||||||
|
if (json.length < 2000) {
|
||||||
|
Logger.info(`Response: ${json}`);
|
||||||
|
} else {
|
||||||
|
Logger.info(`Response (preview): ${json.substring(0, 500)}...`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
break; // Если успешно, выходим
|
||||||
|
} catch (error: any) {
|
||||||
|
if (error.response?.status === 404) {
|
||||||
|
Logger.info(` ❌ 404 Not Found`);
|
||||||
|
} else if (error.response?.status === 403) {
|
||||||
|
Logger.info(` ❌ 403 Forbidden`);
|
||||||
|
} else {
|
||||||
|
Logger.info(` ❌ ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function extractFromSSR() {
|
||||||
|
Logger.info('\n=== МЕТОД 2: Извлечение данных из SSR HTML ===\n');
|
||||||
|
|
||||||
|
const browser = await chromium.launch({ headless: true });
|
||||||
|
const context = await browser.newContext();
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
const productUrl = 'https://magnit.ru/product/1000233138-podguzniki_la_fresh_dlya_vzroslykh_l_10sht?shopCode=992301&shopType=6';
|
||||||
|
|
||||||
|
Logger.info(`Загружаю страницу (без networkidle): ${productUrl}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await page.goto(productUrl, {
|
||||||
|
waitUntil: 'domcontentloaded',
|
||||||
|
timeout: 15000,
|
||||||
|
});
|
||||||
|
|
||||||
|
// Ждем немного для рендеринга
|
||||||
|
await page.waitForTimeout(2000);
|
||||||
|
|
||||||
|
// Проверяем данные в HTML
|
||||||
|
const productData = await page.evaluate(() => {
|
||||||
|
// Ищем данные в window.__NUXT__
|
||||||
|
if ((window as any).__NUXT__) {
|
||||||
|
const nuxtData = (window as any).__NUXT__;
|
||||||
|
return {
|
||||||
|
source: '__NUXT__',
|
||||||
|
keys: Object.keys(nuxtData),
|
||||||
|
// Проверяем ключи с данными о товаре
|
||||||
|
data: nuxtData.data || nuxtData.pinia || null,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ищем JSON в скриптах
|
||||||
|
const scripts = Array.from(document.querySelectorAll('script[type="application/json"]'));
|
||||||
|
for (const script of scripts) {
|
||||||
|
try {
|
||||||
|
const json = JSON.parse(script.textContent || '');
|
||||||
|
if (json.product || json.goods || json.data?.product) {
|
||||||
|
return { source: 'application/json script', data: json };
|
||||||
|
}
|
||||||
|
} catch (e) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
return { found: false };
|
||||||
|
});
|
||||||
|
|
||||||
|
if (productData.source) {
|
||||||
|
Logger.info(`✅ Данные найдены в: ${productData.source}`);
|
||||||
|
Logger.info(`Ключи: ${JSON.stringify(productData.keys || Object.keys(productData.data || {}))}`);
|
||||||
|
if (productData.data) {
|
||||||
|
Logger.info(`Данные (превью): ${JSON.stringify(productData.data).substring(0, 1000)}...`);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
Logger.info(`❌ Данные не найдены`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Также сохраним HTML для анализа
|
||||||
|
const html = await page.content();
|
||||||
|
Logger.info(`\nHTML размер: ${html.length} символов`);
|
||||||
|
Logger.info(`HTML содержит "brand": ${html.includes('"brand"')} раз`);
|
||||||
|
Logger.info(`HTML содержит "description": ${html.includes('"description"')} раз`);
|
||||||
|
Logger.info(`HTML содержит "weight": ${html.includes('"weight"')} раз`);
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error(`Ошибка: ${error}`);
|
||||||
|
} finally {
|
||||||
|
await browser.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
await findDetailApiViaDirectRequest();
|
||||||
|
await extractFromSSR();
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
@@ -0,0 +1,93 @@
|
|||||||
|
import 'dotenv/config';
|
||||||
|
import { chromium } from 'playwright';
|
||||||
|
import axios from 'axios';
|
||||||
|
import { Logger } from '../../utils/logger.js';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
Logger.info('=== Поиск endpoint для ДЕТАЛЕЙ товара ===\n');
|
||||||
|
|
||||||
|
// Получаем cookies
|
||||||
|
const browser = await chromium.launch({ headless: true });
|
||||||
|
const context = await browser.newContext();
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
await page.goto('https://magnit.ru/', { waitUntil: 'domcontentloaded' });
|
||||||
|
const cookies = await context.cookies();
|
||||||
|
const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ');
|
||||||
|
|
||||||
|
const mgUdiCookie = cookies.find(c => c.name === 'mg_udi');
|
||||||
|
const deviceId = mgUdiCookie?.value || '';
|
||||||
|
|
||||||
|
const httpClient = axios.create({
|
||||||
|
baseURL: 'https://magnit.ru',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Cookie': cookieStr,
|
||||||
|
'x-device-id': deviceId,
|
||||||
|
'x-client-name': 'magnit',
|
||||||
|
'x-device-platform': 'Web',
|
||||||
|
'x-new-magnit': 'true',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
|
const productId = '1000233138';
|
||||||
|
|
||||||
|
// Разные возможные endpoints для деталей товара
|
||||||
|
const endpoints = [
|
||||||
|
// v1 API (пользователь нашел reviews через v1)
|
||||||
|
`/webgate/v1/goods/${productId}?storeCode=992301&storeType=6`,
|
||||||
|
`/webgate/v1/products/${productId}?storeCode=992301`,
|
||||||
|
`/webgate/v1/catalog/product/${productId}?storeCode=992301`,
|
||||||
|
`/webgate/v1/listing/goods/${productId}?storeCode=992301`,
|
||||||
|
`/webgate/v1/listing/product/${productId}?storeCode=992301`,
|
||||||
|
// Другие варианты
|
||||||
|
`/webgate/v1/products/detail/${productId}?storeCode=992301`,
|
||||||
|
`/webgate/v1/object?productId=${productId}&storeCode=992301`,
|
||||||
|
`/webgate/v1/item/${productId}?storeCode=992301`,
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const endpoint of endpoints) {
|
||||||
|
try {
|
||||||
|
Logger.info(`Пробую: ${endpoint}`);
|
||||||
|
const response = await httpClient.get(endpoint);
|
||||||
|
|
||||||
|
if (response.status === 200) {
|
||||||
|
Logger.info(`✅ Status: ${response.status}`);
|
||||||
|
|
||||||
|
const json = JSON.stringify(response.data);
|
||||||
|
if (json.length < 3000) {
|
||||||
|
Logger.info(`Response:\n${json}`);
|
||||||
|
} else {
|
||||||
|
// Проверяем, есть ли полезные поля
|
||||||
|
const data = response.data;
|
||||||
|
const hasDetails = data.brand || data.description || data.weight || data.unit;
|
||||||
|
|
||||||
|
if (hasDetails) {
|
||||||
|
Logger.info(`=== НАЙДЕНЫ ДЕТАЛИ ТОВАРА ===`);
|
||||||
|
Logger.info(`brand: ${data.brand}`);
|
||||||
|
Logger.info(`description: ${data.description?.substring(0, 100)}`);
|
||||||
|
Logger.info(`weight: ${data.weight}`);
|
||||||
|
Logger.info(`unit: ${data.unit}`);
|
||||||
|
break;
|
||||||
|
} else {
|
||||||
|
Logger.info(`Response без деталей товара (preview):`);
|
||||||
|
Logger.info(json.substring(0, 500) + '...');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
if (error.response?.status === 404) {
|
||||||
|
Logger.info(` ❌ 404 Not Found`);
|
||||||
|
} else if (error.response?.status === 403) {
|
||||||
|
Logger.info(` ❌ 403 Forbidden`);
|
||||||
|
} else {
|
||||||
|
Logger.info(` ❌ ${error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
@@ -0,0 +1,84 @@
|
|||||||
|
import 'dotenv/config';
|
||||||
|
import { chromium } from 'playwright';
|
||||||
|
import axios from 'axios';
|
||||||
|
import { Logger } from '../../utils/logger.js';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
Logger.info('=== Тестирование всех endpoints для деталей товара ===\n');
|
||||||
|
|
||||||
|
// Получаем cookies
|
||||||
|
const browser = await chromium.launch({ headless: true });
|
||||||
|
const context = await browser.newContext();
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
await page.goto('https://magnit.ru/', { waitUntil: 'domcontentloaded' });
|
||||||
|
const cookies = await context.cookies();
|
||||||
|
const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ');
|
||||||
|
|
||||||
|
const mgUdiCookie = cookies.find(c => c.name === 'mg_udi');
|
||||||
|
const deviceId = mgUdiCookie?.value || '';
|
||||||
|
|
||||||
|
const httpClient = axios.create({
|
||||||
|
baseURL: 'https://magnit.ru',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Cookie': cookieStr,
|
||||||
|
'x-device-id': deviceId,
|
||||||
|
'x-client-name': 'magnit',
|
||||||
|
'x-device-platform': 'Web',
|
||||||
|
'x-new-magnit': 'true',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
|
const endpoints = [
|
||||||
|
{
|
||||||
|
name: '🔍 user-reviews-and-object-info (ДЕТАЛИ ТОВАРА)',
|
||||||
|
url: '/webgate/v1/listing/user-reviews-and-object-info?service=dostavka&objectType=product&objectId=1000530495',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: '🏷️ promotions/type',
|
||||||
|
url: '/webgate/v1/promotions/type?adult=true&type=19&limit=10&storeCode=996609',
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: '🏪 goods/{id}/stores/{storeCode}',
|
||||||
|
url: '/webgate/v2/goods/1000530495/stores/996609?storetype=2&catalogtype=1',
|
||||||
|
},
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const { name, url } of endpoints) {
|
||||||
|
try {
|
||||||
|
Logger.info(`\n${name}`);
|
||||||
|
Logger.info(`URL: ${url}`);
|
||||||
|
|
||||||
|
const response = await httpClient.get(url);
|
||||||
|
Logger.info(`✅ Status: ${response.status}`);
|
||||||
|
|
||||||
|
const data = response.data;
|
||||||
|
const json = JSON.stringify(data, null, 2);
|
||||||
|
|
||||||
|
// Показываем только ключевые поля
|
||||||
|
if (json.length < 2000) {
|
||||||
|
Logger.info(`Response:\n${json}`);
|
||||||
|
} else {
|
||||||
|
Logger.info(`Response (превью):`);
|
||||||
|
console.log(json.substring(0, 800) + '...');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Проверяем наличие ключевых полей
|
||||||
|
const hasDetails = data.brand || data.description || data.weight || data.unit ||
|
||||||
|
data.objectInfo?.brand || data.objectInfo?.description ||
|
||||||
|
data.product?.brand || data.product?.description;
|
||||||
|
|
||||||
|
if (hasDetails) {
|
||||||
|
Logger.info(`\n⭐️⭐️ НАЙДЕНЫ ДЕТАЛИ ТОВАРА! ⭐️⭐️`);
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
Logger.info(`❌ Error: ${error.response?.status || error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
55
experiments/magnit-detail-endpoints/test-detail-endpoint.ts
Normal file
55
experiments/magnit-detail-endpoints/test-detail-endpoint.ts
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
import 'dotenv/config';
|
||||||
|
import { MagnitApiScraper } from '../../scrapers/api/magnit/MagnitApiScraper.js';
|
||||||
|
import { Logger } from '../../utils/logger.js';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const storeCode = process.env.MAGNIT_STORE_CODE || '992301';
|
||||||
|
const productId = '1000233138'; // Из inspect скрипта
|
||||||
|
|
||||||
|
const scraper = new MagnitApiScraper({
|
||||||
|
storeCode,
|
||||||
|
storeType: process.env.MAGNIT_STORE_TYPE || '6',
|
||||||
|
catalogType: process.env.MAGNIT_CATALOG_TYPE || '1',
|
||||||
|
headless: process.env.MAGNIT_HEADLESS !== 'false',
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
await scraper.initialize();
|
||||||
|
|
||||||
|
Logger.info(`Попытка получить детали товара ${productId}...\n`);
|
||||||
|
|
||||||
|
// Пробуем разные возможные endpoints для деталей товара
|
||||||
|
const endpoints = [
|
||||||
|
`/webgate/v2/goods/${productId}`,
|
||||||
|
`/webgate/v2/products/${productId}`,
|
||||||
|
`/webgate/v2/catalog/product/${productId}`,
|
||||||
|
`/webgate/v2/goods/detail/${productId}`,
|
||||||
|
];
|
||||||
|
|
||||||
|
for (const endpoint of endpoints) {
|
||||||
|
try {
|
||||||
|
Logger.info(`Пробую: ${endpoint}`);
|
||||||
|
const response = await (scraper as any).httpClient.get(endpoint);
|
||||||
|
Logger.info(`✅ Успех! Status: ${response.status}`);
|
||||||
|
Logger.info(JSON.stringify(response.data, null, 2));
|
||||||
|
break; // Если успешно, выходим из цикла
|
||||||
|
} catch (error: any) {
|
||||||
|
if (error.response?.status === 404) {
|
||||||
|
Logger.info(` ❌ 404 Not Found`);
|
||||||
|
} else if (error.response?.status === 403) {
|
||||||
|
Logger.info(` ❌ 403 Forbidden (нужна аутентификация)`);
|
||||||
|
} else {
|
||||||
|
Logger.info(` ❌ ${error.response?.status || error.message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error('❌ Ошибка:', error);
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await scraper.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
@@ -0,0 +1,55 @@
|
|||||||
|
import 'dotenv/config';
|
||||||
|
import { chromium } from 'playwright';
|
||||||
|
import axios from 'axios';
|
||||||
|
import { Logger } from '../../utils/logger.js';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
Logger.info('=== Тестирование API endpoint для деталей товара ===\n');
|
||||||
|
|
||||||
|
// Получаем cookies через Playwright
|
||||||
|
const browser = await chromium.launch({ headless: true });
|
||||||
|
const context = await browser.newContext();
|
||||||
|
const page = await context.newPage();
|
||||||
|
|
||||||
|
await page.goto('https://magnit.ru/', { waitUntil: 'domcontentloaded' });
|
||||||
|
const cookies = await context.cookies();
|
||||||
|
const cookieStr = cookies.map(c => `${c.name}=${c.value}`).join('; ');
|
||||||
|
|
||||||
|
const mgUdiCookie = cookies.find(c => c.name === 'mg_udi');
|
||||||
|
const deviceId = mgUdiCookie?.value || '';
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
|
||||||
|
// Создаем HTTP клиент
|
||||||
|
const httpClient = axios.create({
|
||||||
|
baseURL: 'https://magnit.ru',
|
||||||
|
headers: {
|
||||||
|
'Content-Type': 'application/json',
|
||||||
|
'Accept': '*/*',
|
||||||
|
'Cookie': cookieStr,
|
||||||
|
'x-device-id': deviceId,
|
||||||
|
'x-client-name': 'magnit',
|
||||||
|
'x-device-platform': 'Web',
|
||||||
|
'x-new-magnit': 'true',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
|
||||||
|
// Пробуем endpoint который нашел пользователь
|
||||||
|
const endpoint = '/webgate/v1/listing/object-reviews?service=dostavka&objectId=1000530495&objectType=product&page=0&size=10';
|
||||||
|
|
||||||
|
try {
|
||||||
|
Logger.info(`Запрос: ${endpoint}`);
|
||||||
|
const response = await httpClient.get(endpoint);
|
||||||
|
Logger.info(`✅ Status: ${response.status}`);
|
||||||
|
Logger.info(`\n=== ОТВЕТ API ===`);
|
||||||
|
console.log(JSON.stringify(response.data, null, 2));
|
||||||
|
} catch (error: any) {
|
||||||
|
Logger.error(`Ошибка: ${error.message}`);
|
||||||
|
if (error.response) {
|
||||||
|
Logger.error(`Status: ${error.response.status}`);
|
||||||
|
Logger.error(`Data:`, error.response.data);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
@@ -8,6 +8,7 @@
|
|||||||
"build": "tsc",
|
"build": "tsc",
|
||||||
"type-check": "tsc --noEmit",
|
"type-check": "tsc --noEmit",
|
||||||
"dev": "tsx src/scripts/scrape-magnit-products.ts",
|
"dev": "tsx src/scripts/scrape-magnit-products.ts",
|
||||||
|
"enrich": "tsx src/scripts/enrich-product-details.ts",
|
||||||
"test-db": "tsx src/scripts/test-db-connection.ts",
|
"test-db": "tsx src/scripts/test-db-connection.ts",
|
||||||
"prisma:generate": "prisma generate",
|
"prisma:generate": "prisma generate",
|
||||||
"prisma:migrate": "prisma migrate dev",
|
"prisma:migrate": "prisma migrate dev",
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
import 'dotenv/config'
|
import 'dotenv/config'
|
||||||
import { defineConfig, env } from 'prisma/config'
|
import { defineConfig } from 'prisma/config'
|
||||||
|
|
||||||
export default defineConfig({
|
export default defineConfig({
|
||||||
schema: 'src/database/prisma/schema.prisma',
|
schema: 'src/database/prisma/schema.prisma',
|
||||||
@@ -7,6 +7,6 @@ export default defineConfig({
|
|||||||
path: 'src/database/prisma/migrations',
|
path: 'src/database/prisma/migrations',
|
||||||
},
|
},
|
||||||
datasource: {
|
datasource: {
|
||||||
url: env('DATABASE_URL'),
|
url: process.env.DATABASE_URL,
|
||||||
},
|
},
|
||||||
})
|
})
|
||||||
@@ -8,7 +8,6 @@ generator client {
|
|||||||
|
|
||||||
datasource db {
|
datasource db {
|
||||||
provider = "postgresql"
|
provider = "postgresql"
|
||||||
url = env("DATABASE_URL")
|
|
||||||
}
|
}
|
||||||
|
|
||||||
model Store {
|
model Store {
|
||||||
@@ -73,6 +72,9 @@ model Product {
|
|||||||
quantity Int? // остаток на складе
|
quantity Int? // остаток на складе
|
||||||
badges String? // массив бейджей в формате JSON
|
badges String? // массив бейджей в формате JSON
|
||||||
|
|
||||||
|
// Детальная информация
|
||||||
|
isDetailsFetched Boolean @default(false) // были ли получены детали через detail endpoint
|
||||||
|
|
||||||
createdAt DateTime @default(now())
|
createdAt DateTime @default(now())
|
||||||
updatedAt DateTime @updatedAt
|
updatedAt DateTime @updatedAt
|
||||||
|
|
||||||
@@ -100,4 +102,3 @@ model ScrapingSession {
|
|||||||
@@index([status])
|
@@index([status])
|
||||||
@@index([startedAt])
|
@@index([startedAt])
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -7,6 +7,9 @@ import {
|
|||||||
SearchGoodsRequest,
|
SearchGoodsRequest,
|
||||||
SearchGoodsResponse,
|
SearchGoodsResponse,
|
||||||
ProductItem,
|
ProductItem,
|
||||||
|
ObjectInfo,
|
||||||
|
ObjectReviewsResponse,
|
||||||
|
ProductDetailsResponse,
|
||||||
} from './types.js';
|
} from './types.js';
|
||||||
import { ProductService } from '../../../services/product/ProductService.js';
|
import { ProductService } from '../../../services/product/ProductService.js';
|
||||||
import { ProductParser } from '../../../services/parser/ProductParser.js';
|
import { ProductParser } from '../../../services/parser/ProductParser.js';
|
||||||
@@ -90,7 +93,7 @@ export class MagnitApiScraper {
|
|||||||
// Переход на главную страницу для получения cookies
|
// Переход на главную страницу для получения cookies
|
||||||
Logger.info('Переход на главную страницу magnit.ru...');
|
Logger.info('Переход на главную страницу magnit.ru...');
|
||||||
await this.page.goto('https://magnit.ru/', {
|
await this.page.goto('https://magnit.ru/', {
|
||||||
waitUntil: 'networkidle',
|
waitUntil: 'domcontentloaded',
|
||||||
timeout: 30000,
|
timeout: 30000,
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -188,6 +191,86 @@ export class MagnitApiScraper {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Получение object info (описание, рейтинг, отзывы) для товара
|
||||||
|
* Endpoint: /webgate/v1/listing/user-reviews-and-object-info
|
||||||
|
*/
|
||||||
|
async fetchProductObjectInfo(productId: string): Promise<ObjectInfo> {
|
||||||
|
const operation = async () => {
|
||||||
|
try {
|
||||||
|
Logger.debug(`Запрос object info для продукта ${productId}`);
|
||||||
|
|
||||||
|
const response = await this.httpClient.get<ObjectReviewsResponse>(
|
||||||
|
ENDPOINTS.OBJECT_INFO(productId),
|
||||||
|
{ timeout: this.config.requestTimeout ?? 30000 }
|
||||||
|
);
|
||||||
|
|
||||||
|
return response.data.object_info;
|
||||||
|
} catch (error) {
|
||||||
|
if (axios.isAxiosError(error)) {
|
||||||
|
const statusCode = error.response?.status || 0;
|
||||||
|
Logger.error(
|
||||||
|
`Ошибка получения object info: ${statusCode} - ${error.message}`
|
||||||
|
);
|
||||||
|
throw new APIError(
|
||||||
|
`Ошибка получения object info: ${error.message}`,
|
||||||
|
statusCode,
|
||||||
|
error.response?.data
|
||||||
|
);
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return withRetryAndReinit(operation, {
|
||||||
|
...this.config.retryOptions,
|
||||||
|
reinitOn403: this.config.autoReinitOn403 ?? true,
|
||||||
|
onReinit: async () => {
|
||||||
|
await this.reinitializeSession();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Получение детальной информации о товаре (бренд, описание, вес, единица измерения)
|
||||||
|
* Endpoint: /webgate/v2/goods/{productId}/stores/{storeCode}
|
||||||
|
*/
|
||||||
|
async fetchProductDetails(productId: string): Promise<ProductDetailsResponse> {
|
||||||
|
const operation = async () => {
|
||||||
|
try {
|
||||||
|
Logger.debug(`Запрос деталей для продукта ${productId}`);
|
||||||
|
|
||||||
|
const response = await this.httpClient.get<ProductDetailsResponse>(
|
||||||
|
ENDPOINTS.PRODUCT_DETAILS(productId, this.config.storeCode),
|
||||||
|
{ timeout: this.config.requestTimeout ?? 30000 }
|
||||||
|
);
|
||||||
|
|
||||||
|
return response.data;
|
||||||
|
} catch (error) {
|
||||||
|
if (axios.isAxiosError(error)) {
|
||||||
|
const statusCode = error.response?.status || 0;
|
||||||
|
Logger.error(
|
||||||
|
`Ошибка получения деталей товара: ${statusCode} - ${error.message}`
|
||||||
|
);
|
||||||
|
throw new APIError(
|
||||||
|
`Ошибка получения деталей товара: ${error.message}`,
|
||||||
|
statusCode,
|
||||||
|
error.response?.data
|
||||||
|
);
|
||||||
|
}
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
return withRetryAndReinit(operation, {
|
||||||
|
...this.config.retryOptions,
|
||||||
|
reinitOn403: this.config.autoReinitOn403 ?? true,
|
||||||
|
onReinit: async () => {
|
||||||
|
await this.reinitializeSession();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Переинициализация сессии (при 403 или истечении cookies)
|
* Переинициализация сессии (при 403 или истечении cookies)
|
||||||
* ВАЖНО: Не закрываем браузер, только обновляем cookies
|
* ВАЖНО: Не закрываем браузер, только обновляем cookies
|
||||||
@@ -204,7 +287,7 @@ export class MagnitApiScraper {
|
|||||||
|
|
||||||
// Переход на главную страницу для обновления cookies
|
// Переход на главную страницу для обновления cookies
|
||||||
await this.page.goto('https://magnit.ru/', {
|
await this.page.goto('https://magnit.ru/', {
|
||||||
waitUntil: 'networkidle',
|
waitUntil: 'domcontentloaded',
|
||||||
timeout: 30000,
|
timeout: 30000,
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -5,5 +5,17 @@ export const ENDPOINTS = {
|
|||||||
SEARCH_GOODS: `${MAGNIT_API_BASE}/goods/search`,
|
SEARCH_GOODS: `${MAGNIT_API_BASE}/goods/search`,
|
||||||
PRODUCT_STORES: (productId: string, storeCode: string) =>
|
PRODUCT_STORES: (productId: string, storeCode: string) =>
|
||||||
`${MAGNIT_API_BASE}/goods/${productId}/stores/${storeCode}`,
|
`${MAGNIT_API_BASE}/goods/${productId}/stores/${storeCode}`,
|
||||||
|
|
||||||
|
// Product detail endpoints
|
||||||
|
OBJECT_INFO: (productId: string) =>
|
||||||
|
`${MAGNIT_BASE_URL}/webgate/v1/listing/user-reviews-and-object-info?service=dostavka&objectType=product&objectId=${productId}`,
|
||||||
|
|
||||||
|
PRODUCT_DETAILS: (
|
||||||
|
productId: string,
|
||||||
|
storeCode: string,
|
||||||
|
storeType = '2',
|
||||||
|
catalogType = '1'
|
||||||
|
) =>
|
||||||
|
`${MAGNIT_API_BASE}/goods/${productId}/stores/${storeCode}?storetype=${storeType}&catalogtype=${catalogType}`,
|
||||||
} as const;
|
} as const;
|
||||||
|
|
||||||
|
|||||||
@@ -78,3 +78,35 @@ export interface SearchGoodsResponse {
|
|||||||
fastCategoriesExtended?: FastCategory[];
|
fastCategoriesExtended?: FastCategory[];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// =====================================================
|
||||||
|
// Types for Product Detail Endpoints
|
||||||
|
// =====================================================
|
||||||
|
|
||||||
|
// Object info from /webgate/v1/listing/user-reviews-and-object-info
|
||||||
|
export interface ObjectInfo {
|
||||||
|
id: string;
|
||||||
|
title: string;
|
||||||
|
description: string;
|
||||||
|
url: string;
|
||||||
|
average_rating: number;
|
||||||
|
count_ratings: number;
|
||||||
|
count_reviews: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ObjectReviewsResponse {
|
||||||
|
my_reviews: any[];
|
||||||
|
object_info: ObjectInfo;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Product details from /webgate/v2/goods/{productId}/stores/{storeCode}
|
||||||
|
export interface ProductDetailParameter {
|
||||||
|
name: string;
|
||||||
|
value: string;
|
||||||
|
parameters?: ProductDetailParameter[];
|
||||||
|
}
|
||||||
|
|
||||||
|
export interface ProductDetailsResponse {
|
||||||
|
categories: number[];
|
||||||
|
details: ProductDetailParameter[];
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
97
src/scripts/analyze-category-nulls.ts
Normal file
97
src/scripts/analyze-category-nulls.ts
Normal file
@@ -0,0 +1,97 @@
|
|||||||
|
import 'dotenv/config';
|
||||||
|
import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js';
|
||||||
|
import { Logger } from '../utils/logger.js';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
try {
|
||||||
|
await connectDatabase();
|
||||||
|
|
||||||
|
// Check total products and null categoryId count
|
||||||
|
const totalProducts = await prisma.product.count();
|
||||||
|
const nullCategoryCount = await prisma.product.count({
|
||||||
|
where: { categoryId: null }
|
||||||
|
});
|
||||||
|
const withCategoryCount = await prisma.product.count({
|
||||||
|
where: { categoryId: { not: null } }
|
||||||
|
});
|
||||||
|
|
||||||
|
Logger.info('\n📊 СТАТИСТИКА ПО КАТЕГОРИЯМ:');
|
||||||
|
Logger.info(`Всего товаров: ${totalProducts}`);
|
||||||
|
Logger.info(`Товаров без категории (null): ${nullCategoryCount} (${((nullCategoryCount / totalProducts) * 100).toFixed(2)}%)`);
|
||||||
|
Logger.info(`Товаров с категорией: ${withCategoryCount} (${((withCategoryCount / totalProducts) * 100).toFixed(2)}%)`);
|
||||||
|
|
||||||
|
// Check total categories
|
||||||
|
const totalCategories = await prisma.category.count();
|
||||||
|
Logger.info(`\nВсего категорий в БД: ${totalCategories}`);
|
||||||
|
|
||||||
|
// Sample categories
|
||||||
|
if (totalCategories > 0) {
|
||||||
|
const sampleCategories = await prisma.category.findMany({
|
||||||
|
take: 5,
|
||||||
|
select: {
|
||||||
|
id: true,
|
||||||
|
externalId: true,
|
||||||
|
name: true,
|
||||||
|
_count: {
|
||||||
|
select: { products: true }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Logger.info('\n📁 Примеры категорий:');
|
||||||
|
sampleCategories.forEach(cat => {
|
||||||
|
Logger.info(` - [${cat.externalId}] ${cat.name} (товаров: ${cat._count.products})`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Sample products without categories
|
||||||
|
const productsWithoutCategory = await prisma.product.findMany({
|
||||||
|
where: { categoryId: null },
|
||||||
|
take: 5,
|
||||||
|
select: {
|
||||||
|
id: true,
|
||||||
|
externalId: true,
|
||||||
|
name: true,
|
||||||
|
currentPrice: true
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Logger.info('\n❌ Примеры товаров БЕЗ категории:');
|
||||||
|
productsWithoutCategory.forEach(p => {
|
||||||
|
Logger.info(` - [${p.externalId}] ${p.name} (₽${p.currentPrice})`);
|
||||||
|
});
|
||||||
|
|
||||||
|
// Sample products with categories
|
||||||
|
const productsWithCategory = await prisma.product.findMany({
|
||||||
|
where: { categoryId: { not: null } },
|
||||||
|
take: 5,
|
||||||
|
select: {
|
||||||
|
id: true,
|
||||||
|
externalId: true,
|
||||||
|
name: true,
|
||||||
|
currentPrice: true,
|
||||||
|
category: {
|
||||||
|
select: {
|
||||||
|
externalId: true,
|
||||||
|
name: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (productsWithCategory.length > 0) {
|
||||||
|
Logger.info('\n✅ Примеры товаров С категорией:');
|
||||||
|
productsWithCategory.forEach(p => {
|
||||||
|
Logger.info(` - [${p.externalId}] ${p.name} → [${p.category?.externalId}] ${p.category?.name}`);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error('❌ Ошибка при анализе:', error);
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await disconnectDatabase();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
38
src/scripts/check-product-details.ts
Normal file
38
src/scripts/check-product-details.ts
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
import 'dotenv/config';
|
||||||
|
import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js';
|
||||||
|
import { Logger } from '../utils/logger.js';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
try {
|
||||||
|
await connectDatabase();
|
||||||
|
|
||||||
|
// Get a sample product with all fields
|
||||||
|
const product = await prisma.product.findFirst({
|
||||||
|
select: {
|
||||||
|
id: true,
|
||||||
|
externalId: true,
|
||||||
|
name: true,
|
||||||
|
description: true,
|
||||||
|
currentPrice: true,
|
||||||
|
unit: true,
|
||||||
|
weight: true,
|
||||||
|
brand: true,
|
||||||
|
categoryId: true,
|
||||||
|
badges: true,
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
if (product) {
|
||||||
|
Logger.info('=== ДЕТАЛИ ТОВАРА ИЗ БД ===');
|
||||||
|
Logger.info(JSON.stringify(product, null, 2));
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error('❌ Ошибка:', error);
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await disconnectDatabase();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
192
src/scripts/enrich-product-details.ts
Normal file
192
src/scripts/enrich-product-details.ts
Normal file
@@ -0,0 +1,192 @@
|
|||||||
|
import 'dotenv/config';
|
||||||
|
import { prisma } from '../config/database.js';
|
||||||
|
import { MagnitApiScraper } from '../scrapers/api/magnit/MagnitApiScraper.js';
|
||||||
|
import { ProductService } from '../services/product/ProductService.js';
|
||||||
|
import { ProductParser } from '../services/parser/ProductParser.js';
|
||||||
|
import { Logger } from '../utils/logger.js';
|
||||||
|
|
||||||
|
interface EnrichmentStats {
|
||||||
|
total: number;
|
||||||
|
success: number;
|
||||||
|
errors: number;
|
||||||
|
withBrand: number;
|
||||||
|
withDescription: number;
|
||||||
|
withWeight: number;
|
||||||
|
withUnit: number;
|
||||||
|
withRating: number;
|
||||||
|
withScoresCount: number;
|
||||||
|
withCommentsCount: number;
|
||||||
|
}
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
Logger.info('=== Обогащение товаров детальной информацией ===\n');
|
||||||
|
|
||||||
|
const storeCode = process.env.MAGNIT_STORE_CODE || '992301';
|
||||||
|
|
||||||
|
// Флаг: использовать ли второй endpoint для рейтингов
|
||||||
|
const fetchObjectInfo = process.env.FETCH_OBJECT_INFO === 'true';
|
||||||
|
if (fetchObjectInfo) {
|
||||||
|
Logger.info('📊 Режим: с получением рейтингов (OBJECT_INFO endpoint)\n');
|
||||||
|
} else {
|
||||||
|
Logger.info('📦 Режим: только детали товаров (PRODUCT_DETAILS endpoint)\n');
|
||||||
|
Logger.info(' Включи рейтинги: FETCH_OBJECT_INFO=true pnpm enrich\n');
|
||||||
|
}
|
||||||
|
|
||||||
|
// Инициализация
|
||||||
|
const productService = new ProductService(prisma);
|
||||||
|
const scraper = new MagnitApiScraper({
|
||||||
|
storeCode,
|
||||||
|
rateLimitDelay: 300,
|
||||||
|
});
|
||||||
|
|
||||||
|
const stats: EnrichmentStats = {
|
||||||
|
total: 0,
|
||||||
|
success: 0,
|
||||||
|
errors: 0,
|
||||||
|
withBrand: 0,
|
||||||
|
withDescription: 0,
|
||||||
|
withWeight: 0,
|
||||||
|
withUnit: 0,
|
||||||
|
withRating: 0,
|
||||||
|
withScoresCount: 0,
|
||||||
|
withCommentsCount: 0,
|
||||||
|
};
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Инициализация скрапера
|
||||||
|
await scraper.initialize();
|
||||||
|
Logger.info('✅ Скрапер инициализирован\n');
|
||||||
|
|
||||||
|
// Получаем товары, для которых не были получены детали
|
||||||
|
const batchSize = 100;
|
||||||
|
let processedCount = 0;
|
||||||
|
let hasMore = true;
|
||||||
|
|
||||||
|
while (hasMore) {
|
||||||
|
Logger.info(`Получение порции товаров (до ${batchSize})...`);
|
||||||
|
const products = await productService.getProductsNeedingDetails(storeCode, batchSize);
|
||||||
|
|
||||||
|
if (products.length === 0) {
|
||||||
|
Logger.info('Все товары обработаны');
|
||||||
|
hasMore = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
Logger.info(`Обработка ${products.length} товаров...\n`);
|
||||||
|
|
||||||
|
for (const product of products) {
|
||||||
|
stats.total++;
|
||||||
|
processedCount++;
|
||||||
|
|
||||||
|
try {
|
||||||
|
Logger.info(`[${processedCount}] Обработка товара: ${product.externalId} - ${product.name}`);
|
||||||
|
|
||||||
|
// Запрос деталей из PRODUCT_DETAILS endpoint
|
||||||
|
const detailsResponse = await scraper.fetchProductDetails(product.externalId);
|
||||||
|
const parsedDetails = ProductParser.parseProductDetails(detailsResponse);
|
||||||
|
|
||||||
|
// Если включён флаг - запрашиваем рейтинги из OBJECT_INFO endpoint
|
||||||
|
let objectInfoData: {
|
||||||
|
description?: string;
|
||||||
|
rating?: number;
|
||||||
|
scoresCount?: number;
|
||||||
|
commentsCount?: number;
|
||||||
|
imageUrl?: string;
|
||||||
|
} = {};
|
||||||
|
|
||||||
|
if (fetchObjectInfo) {
|
||||||
|
try {
|
||||||
|
const objectInfo = await scraper.fetchProductObjectInfo(product.externalId);
|
||||||
|
objectInfoData = ProductParser.parseObjectInfo(objectInfo);
|
||||||
|
} catch (err) {
|
||||||
|
// Если OBJECT_INFO недоступен - не фатально, продолжаем
|
||||||
|
Logger.debug(` ⚠️ OBJECT_INFO недоступен: ${(err as Error).message}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Мёрджим данные (OBJECT_INFO имеет приоритет для description)
|
||||||
|
const mergedDetails = {
|
||||||
|
...parsedDetails,
|
||||||
|
...objectInfoData,
|
||||||
|
description: objectInfoData.description || parsedDetails.description,
|
||||||
|
};
|
||||||
|
|
||||||
|
// Подсчет статистики
|
||||||
|
if (mergedDetails.brand) stats.withBrand++;
|
||||||
|
if (mergedDetails.description) stats.withDescription++;
|
||||||
|
if (mergedDetails.weight) stats.withWeight++;
|
||||||
|
if (mergedDetails.unit) stats.withUnit++;
|
||||||
|
if (mergedDetails.rating !== undefined) stats.withRating++;
|
||||||
|
if (mergedDetails.scoresCount !== undefined) stats.withScoresCount++;
|
||||||
|
if (mergedDetails.commentsCount !== undefined) stats.withCommentsCount++;
|
||||||
|
|
||||||
|
// Убираем categoryId из деталей для обновления (категория может не существовать в БД)
|
||||||
|
const { categoryId, ...detailsForUpdate } = mergedDetails;
|
||||||
|
|
||||||
|
// Обновление товара в БД
|
||||||
|
await productService.updateProductDetails(
|
||||||
|
product.externalId,
|
||||||
|
product.storeId,
|
||||||
|
detailsForUpdate
|
||||||
|
);
|
||||||
|
|
||||||
|
stats.success++;
|
||||||
|
|
||||||
|
Logger.info(
|
||||||
|
` ✅ Обновлено: ` +
|
||||||
|
`${mergedDetails.brand ? 'brand ' : ''}` +
|
||||||
|
`${mergedDetails.description ? 'description ' : ''}` +
|
||||||
|
`${mergedDetails.weight ? 'weight ' : ''}` +
|
||||||
|
`${mergedDetails.unit ? 'unit ' : ''}` +
|
||||||
|
`${mergedDetails.rating !== undefined ? 'rating ' : ''}` +
|
||||||
|
`${mergedDetails.scoresCount !== undefined ? 'scores ' : ''}` +
|
||||||
|
`${mergedDetails.commentsCount !== undefined ? 'comments ' : ''}`
|
||||||
|
);
|
||||||
|
} catch (error: any) {
|
||||||
|
stats.errors++;
|
||||||
|
|
||||||
|
// Отмечаем товар как обработанный, даже если произошла ошибка
|
||||||
|
// чтобы не пытаться получить детали снова
|
||||||
|
try {
|
||||||
|
await productService.markAsDetailsFetched(product.externalId, product.storeId);
|
||||||
|
Logger.warn(` ⚠️ Ошибка, товар пропущен: ${error.message}`);
|
||||||
|
} catch (markError) {
|
||||||
|
Logger.error(` ❌ Ошибка отметки товара: ${markError}`);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Rate limiting между запросами
|
||||||
|
if (processedCount % 10 === 0) {
|
||||||
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Logger.info(`\n--- Прогресс: обработано ${processedCount} товаров ---\n`);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Вывод статистики
|
||||||
|
Logger.info('\n=== СТАТИСТИКА ОБОГАЩЕНИЯ ===');
|
||||||
|
Logger.info(`Всего обработано: ${stats.total}`);
|
||||||
|
Logger.info(`Успешно: ${stats.success}`);
|
||||||
|
Logger.info(`Ошибок: ${stats.errors}`);
|
||||||
|
Logger.info(`\nПолучено полей:`);
|
||||||
|
Logger.info(` Бренды: ${stats.withBrand} (${((stats.withBrand / stats.total) * 100).toFixed(1)}%)`);
|
||||||
|
Logger.info(` Описания: ${stats.withDescription} (${((stats.withDescription / stats.total) * 100).toFixed(1)}%)`);
|
||||||
|
Logger.info(` Вес: ${stats.withWeight} (${((stats.withWeight / stats.total) * 100).toFixed(1)}%)`);
|
||||||
|
Logger.info(` Единицы: ${stats.withUnit} (${((stats.withUnit / stats.total) * 100).toFixed(1)}%)`);
|
||||||
|
if (fetchObjectInfo) {
|
||||||
|
Logger.info(` Рейтинги: ${stats.withRating} (${((stats.withRating / stats.total) * 100).toFixed(1)}%)`);
|
||||||
|
Logger.info(` Кол-во оценок: ${stats.withScoresCount} (${((stats.withScoresCount / stats.total) * 100).toFixed(1)}%)`);
|
||||||
|
Logger.info(` Кол-во отзывов: ${stats.withCommentsCount} (${((stats.withCommentsCount / stats.total) * 100).toFixed(1)}%)`);
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error('Критическая ошибка:', error);
|
||||||
|
throw error;
|
||||||
|
} finally {
|
||||||
|
await scraper.close();
|
||||||
|
Logger.info('\n✅ Работа завершена');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
67
src/scripts/inspect-api-response.ts
Normal file
67
src/scripts/inspect-api-response.ts
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
import 'dotenv/config';
|
||||||
|
import { MagnitApiScraper } from '../scrapers/api/magnit/MagnitApiScraper.js';
|
||||||
|
import { Logger } from '../utils/logger.js';
|
||||||
|
|
||||||
|
async function main() {
|
||||||
|
const storeCode = process.env.MAGNIT_STORE_CODE || '992301';
|
||||||
|
|
||||||
|
const scraper = new MagnitApiScraper({
|
||||||
|
storeCode,
|
||||||
|
storeType: process.env.MAGNIT_STORE_TYPE || '6',
|
||||||
|
catalogType: process.env.MAGNIT_CATALOG_TYPE || '1',
|
||||||
|
headless: process.env.MAGNIT_HEADLESS !== 'false',
|
||||||
|
});
|
||||||
|
|
||||||
|
try {
|
||||||
|
await scraper.initialize();
|
||||||
|
|
||||||
|
Logger.info('Запрос первых 5 товаров для инспекции...\n');
|
||||||
|
|
||||||
|
const response = await scraper.searchGoods({ limit: 5, offset: 0 }, []);
|
||||||
|
|
||||||
|
Logger.info(`Получено товаров: ${response.items.length}\n`);
|
||||||
|
|
||||||
|
if (response.items.length > 0) {
|
||||||
|
Logger.info('=== СТРУКТУРА ПЕРВОГО ТОВАРА ===');
|
||||||
|
const firstProduct = response.items[0];
|
||||||
|
Logger.info(JSON.stringify(firstProduct, null, 2));
|
||||||
|
|
||||||
|
Logger.info('\n=== ПРОВЕРКА НАЛИЧИЯ КАТЕГОРИЙ ===');
|
||||||
|
response.items.forEach((item, index) => {
|
||||||
|
Logger.info(
|
||||||
|
`${index + 1}. [${item.id}] ${item.name.substring(0, 50)}...`
|
||||||
|
);
|
||||||
|
if (item.category) {
|
||||||
|
Logger.info(` ✅ Категория: [${item.category.id}] ${item.category.title}`);
|
||||||
|
} else {
|
||||||
|
Logger.info(` ❌ Категория отсутствует (undefined)`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
Logger.info('\n=== ОТВЕТ API (response.category) ===');
|
||||||
|
if (response.category) {
|
||||||
|
Logger.info(`Категория уровня ответа: [${response.category.id}] ${response.category.title}`);
|
||||||
|
} else {
|
||||||
|
Logger.info('Категория уровня ответа отсутствует');
|
||||||
|
}
|
||||||
|
|
||||||
|
Logger.info('\n=== БЫСТРЫЕ КАТЕГОРИИ (fastCategoriesExtended) ===');
|
||||||
|
if (response.fastCategoriesExtended && response.fastCategoriesExtended.length > 0) {
|
||||||
|
Logger.info(`Найдено ${response.fastCategoriesExtended.length} быстрых категорий:`);
|
||||||
|
response.fastCategoriesExtended.slice(0, 10).forEach(cat => {
|
||||||
|
Logger.info(` - [${cat.id}] ${cat.title}`);
|
||||||
|
});
|
||||||
|
} else {
|
||||||
|
Logger.info('Быстрые категории отсутствуют');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error('❌ Ошибка:', error);
|
||||||
|
process.exit(1);
|
||||||
|
} finally {
|
||||||
|
await scraper.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
main();
|
||||||
@@ -1,4 +1,4 @@
|
|||||||
import { ProductItem } from '../../scrapers/api/magnit/types.js';
|
import { ProductItem, ProductDetailsResponse, ObjectInfo } from '../../scrapers/api/magnit/types.js';
|
||||||
import { CreateProductData } from '../product/ProductService.js';
|
import { CreateProductData } from '../product/ProductService.js';
|
||||||
|
|
||||||
export class ProductParser {
|
export class ProductParser {
|
||||||
@@ -80,5 +80,113 @@ export class ProductParser {
|
|||||||
return this.parseProductItem(item, storeId, categoryId);
|
return this.parseProductItem(item, storeId, categoryId);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Парсинг деталей товара из ProductDetailsResponse
|
||||||
|
* Извлекает бренд, описание, вес, единицу измерения из массива details
|
||||||
|
*/
|
||||||
|
static parseProductDetails(detailsResponse: ProductDetailsResponse): {
|
||||||
|
brand?: string;
|
||||||
|
description?: string;
|
||||||
|
weight?: string;
|
||||||
|
unit?: string;
|
||||||
|
categoryId?: number;
|
||||||
|
} {
|
||||||
|
const result: {
|
||||||
|
brand?: string;
|
||||||
|
description?: string;
|
||||||
|
weight?: string;
|
||||||
|
unit?: string;
|
||||||
|
categoryId?: number;
|
||||||
|
} = {};
|
||||||
|
|
||||||
|
// Получаем первую категорию из массива
|
||||||
|
if (detailsResponse.categories && detailsResponse.categories.length > 0) {
|
||||||
|
result.categoryId = detailsResponse.categories[0];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Парсим массив деталей для извлечения полей
|
||||||
|
for (const detail of detailsResponse.details) {
|
||||||
|
const name = detail.name;
|
||||||
|
const nameLower = detail.name.toLowerCase();
|
||||||
|
|
||||||
|
// Бренд - проверяем и русское и английское название
|
||||||
|
if (name === 'Бренд' || nameLower === 'brand') {
|
||||||
|
// Игнорируем "Различные бренды"
|
||||||
|
if (detail.value && !detail.value.includes('Различные бренды')) {
|
||||||
|
result.brand = detail.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Описание товара
|
||||||
|
else if (name === 'Описание товара' || nameLower.includes('описание')) {
|
||||||
|
if (detail.value) {
|
||||||
|
result.description = detail.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Вес - может быть на русском
|
||||||
|
else if (nameLower.includes('вес') || nameLower === 'weight') {
|
||||||
|
if (detail.value) {
|
||||||
|
result.weight = detail.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Единица измерения - может быть на русском
|
||||||
|
else if (name === 'Единица измерения' || nameLower.includes('единица') || nameLower === 'unit') {
|
||||||
|
if (detail.value) {
|
||||||
|
result.unit = detail.value;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Парсинг ObjectInfo для получения рейтингов и описания
|
||||||
|
*/
|
||||||
|
static parseObjectInfo(objectInfo: ObjectInfo): {
|
||||||
|
description?: string;
|
||||||
|
rating?: number;
|
||||||
|
scoresCount?: number;
|
||||||
|
commentsCount?: number;
|
||||||
|
imageUrl?: string;
|
||||||
|
} {
|
||||||
|
const result: {
|
||||||
|
description?: string;
|
||||||
|
rating?: number;
|
||||||
|
scoresCount?: number;
|
||||||
|
commentsCount?: number;
|
||||||
|
imageUrl?: string;
|
||||||
|
} = {};
|
||||||
|
|
||||||
|
// Описание из object_info (если есть)
|
||||||
|
if (objectInfo.description) {
|
||||||
|
result.description = objectInfo.description;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Рейтинг
|
||||||
|
if (objectInfo.average_rating !== undefined) {
|
||||||
|
result.rating = objectInfo.average_rating;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Количество оценок
|
||||||
|
if (objectInfo.count_ratings !== undefined) {
|
||||||
|
result.scoresCount = objectInfo.count_ratings;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Количество отзывов
|
||||||
|
if (objectInfo.count_reviews !== undefined) {
|
||||||
|
result.commentsCount = objectInfo.count_reviews;
|
||||||
|
}
|
||||||
|
|
||||||
|
// URL изображения (если есть)
|
||||||
|
if (objectInfo.url) {
|
||||||
|
result.imageUrl = objectInfo.url;
|
||||||
|
}
|
||||||
|
|
||||||
|
return result;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ export interface CreateProductData {
|
|||||||
commentsCount?: number;
|
commentsCount?: number;
|
||||||
quantity?: number;
|
quantity?: number;
|
||||||
badges?: string;
|
badges?: string;
|
||||||
|
isDetailsFetched?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
export class ProductService {
|
export class ProductService {
|
||||||
@@ -45,9 +46,9 @@ export class ProductService {
|
|||||||
if (existing) {
|
if (existing) {
|
||||||
// Обновляем существующий товар
|
// Обновляем существующий товар
|
||||||
Logger.debug(`Обновление товара: ${data.externalId}`);
|
Logger.debug(`Обновление товара: ${data.externalId}`);
|
||||||
return await this.prisma.product.update({
|
|
||||||
where: { id: existing.id },
|
// Если isDetailsFetched не передан явно, сохраняем текущее значение
|
||||||
data: {
|
const updateData: any = {
|
||||||
name: data.name,
|
name: data.name,
|
||||||
description: data.description,
|
description: data.description,
|
||||||
url: data.url,
|
url: data.url,
|
||||||
@@ -65,7 +66,16 @@ export class ProductService {
|
|||||||
quantity: data.quantity,
|
quantity: data.quantity,
|
||||||
badges: data.badges,
|
badges: data.badges,
|
||||||
categoryId: data.categoryId,
|
categoryId: data.categoryId,
|
||||||
},
|
};
|
||||||
|
|
||||||
|
// Обновляем isDetailsFetched только если передано явно
|
||||||
|
if (data.isDetailsFetched !== undefined) {
|
||||||
|
updateData.isDetailsFetched = data.isDetailsFetched;
|
||||||
|
}
|
||||||
|
|
||||||
|
return await this.prisma.product.update({
|
||||||
|
where: { id: existing.id },
|
||||||
|
data: updateData,
|
||||||
});
|
});
|
||||||
} else {
|
} else {
|
||||||
// Создаем новый товар
|
// Создаем новый товар
|
||||||
@@ -204,5 +214,103 @@ export class ProductService {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Получение товаров, для которых не были получены детали
|
||||||
|
*/
|
||||||
|
async getProductsNeedingDetails(storeCode: string, limit?: number): Promise<Product[]> {
|
||||||
|
try {
|
||||||
|
// Сначала находим store по code
|
||||||
|
const store = await this.prisma.store.findFirst({
|
||||||
|
where: { code: storeCode },
|
||||||
|
});
|
||||||
|
|
||||||
|
if (!store) {
|
||||||
|
throw new DatabaseError(`Магазин с кодом ${storeCode} не найден`);
|
||||||
|
}
|
||||||
|
|
||||||
|
return await this.prisma.product.findMany({
|
||||||
|
where: {
|
||||||
|
storeId: store.id,
|
||||||
|
isDetailsFetched: false,
|
||||||
|
},
|
||||||
|
take: limit,
|
||||||
|
orderBy: {
|
||||||
|
id: 'asc',
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error('Ошибка получения товаров для обогащения:', error);
|
||||||
|
throw new DatabaseError(
|
||||||
|
`Не удалось получить товары: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
error instanceof Error ? error : undefined
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Обновление деталей товара (бренд, описание, вес, единица измерения, рейтинг)
|
||||||
|
*/
|
||||||
|
async updateProductDetails(
|
||||||
|
externalId: string,
|
||||||
|
storeId: number,
|
||||||
|
details: {
|
||||||
|
brand?: string;
|
||||||
|
description?: string;
|
||||||
|
weight?: string;
|
||||||
|
unit?: string;
|
||||||
|
categoryId?: number;
|
||||||
|
rating?: number;
|
||||||
|
scoresCount?: number;
|
||||||
|
commentsCount?: number;
|
||||||
|
imageUrl?: string;
|
||||||
|
}
|
||||||
|
): Promise<Product> {
|
||||||
|
try {
|
||||||
|
return await this.prisma.product.update({
|
||||||
|
where: {
|
||||||
|
externalId_storeId: {
|
||||||
|
externalId,
|
||||||
|
storeId,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
data: {
|
||||||
|
...details,
|
||||||
|
isDetailsFetched: true,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error('Ошибка обновления деталей товара:', error);
|
||||||
|
throw new DatabaseError(
|
||||||
|
`Не удалось обновить детали товара: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
error instanceof Error ? error : undefined
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Отметить товар как обработанный (даже если детали не были получены)
|
||||||
|
*/
|
||||||
|
async markAsDetailsFetched(externalId: string, storeId: number): Promise<Product> {
|
||||||
|
try {
|
||||||
|
return await this.prisma.product.update({
|
||||||
|
where: {
|
||||||
|
externalId_storeId: {
|
||||||
|
externalId,
|
||||||
|
storeId,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
data: {
|
||||||
|
isDetailsFetched: true,
|
||||||
|
},
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
Logger.error('Ошибка отметки товара как обработанного:', error);
|
||||||
|
throw new DatabaseError(
|
||||||
|
`Не удалось отметить товар: ${error instanceof Error ? error.message : String(error)}`,
|
||||||
|
error instanceof Error ? error : undefined
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -17,7 +17,11 @@
|
|||||||
"noEmit": false
|
"noEmit": false
|
||||||
},
|
},
|
||||||
"include": ["src/**/*"],
|
"include": ["src/**/*"],
|
||||||
"exclude": ["node_modules", "dist", "generated"],
|
"exclude": [
|
||||||
|
"node_modules",
|
||||||
|
"dist",
|
||||||
|
"generated"
|
||||||
|
],
|
||||||
"ts-node": {
|
"ts-node": {
|
||||||
"esm": true
|
"esm": true
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user