Compare commits
1 Commits
912946bb00
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| b6f5138390 |
29
AGENTS.md
29
AGENTS.md
@@ -4,7 +4,7 @@ Guidelines for AI coding agents working on this repository.
|
||||
|
||||
## Project Overview
|
||||
|
||||
TypeScript-based scraper for Russian supermarkets (Magnit). Uses Playwright for sessions, Axios for API, PostgreSQL with Prisma ORM.
|
||||
TypeScript-based scraper for Russian supermarkets (Magnit). Uses Playwright for sessions, Axios for API, PostgreSQL with Drizzle ORM.
|
||||
|
||||
## Build & Run Commands
|
||||
|
||||
@@ -20,12 +20,13 @@ pnpm enrich # Run product enrichment
|
||||
pnpm test-db # Test database connection
|
||||
```
|
||||
|
||||
### Prisma Commands
|
||||
### Drizzle Commands
|
||||
|
||||
```bash
|
||||
pnpm prisma:generate # Generate client after schema changes
|
||||
pnpm prisma:migrate # Create and apply migrations
|
||||
pnpm prisma:studio # Open database GUI
|
||||
pnpm db:generate # Generate migration files
|
||||
pnpm db:migrate # Apply migrations
|
||||
pnpm db:push # Push schema changes directly (dev only)
|
||||
pnpm db:studio # Open database GUI
|
||||
```
|
||||
|
||||
### Running Scripts Directly
|
||||
@@ -45,13 +46,15 @@ No test framework configured. Manual testing via `pnpm test-db`, `pnpm dev`, Pri
|
||||
|
||||
1. External packages first, then internal modules
|
||||
2. **Always include `.js` extension** for local imports (ESM)
|
||||
3. Use named imports from Prisma client
|
||||
3. Use named imports from Drizzle schema
|
||||
|
||||
```typescript
|
||||
import { chromium, Browser } from 'playwright';
|
||||
import axios from 'axios';
|
||||
import { Logger } from '../../../utils/logger.js';
|
||||
import { PrismaClient } from '../../../../generated/prisma/client.js';
|
||||
import { db } from '../../../config/database.js';
|
||||
import { products, stores, categories } from '../../../db/schema.js';
|
||||
import { eq, and, asc } from 'drizzle-orm';
|
||||
```
|
||||
|
||||
### Naming Conventions
|
||||
@@ -113,15 +116,19 @@ Logger.debug('Debug'); // Only when DEBUG=true
|
||||
|
||||
### Services Pattern
|
||||
|
||||
- Services receive `PrismaClient` via constructor (DI)
|
||||
- Services receive `db` (Drizzle instance) via constructor (DI)
|
||||
- Use `getOrCreate` for idempotent operations
|
||||
- Never call Prisma directly from scrapers
|
||||
- Never call Drizzle directly from scrapers
|
||||
|
||||
### Database Patterns
|
||||
|
||||
- Upsert via composite unique `(externalId, storeId)`
|
||||
- Upsert via composite unique constraint on `(externalId, storeId)`
|
||||
- Batch processing: 50 items per batch
|
||||
- Prices: Float (rubles), converted from kopecks
|
||||
- Prices: Decimal (rubles), stored as decimal type
|
||||
- Use `.select().from().where()` for queries
|
||||
- Use `.insert().values()` for inserts
|
||||
- Use `.update().set().where()` for updates
|
||||
- Use `.delete().where()` for deletes
|
||||
|
||||
### Comments
|
||||
|
||||
|
||||
11
drizzle.config.ts
Normal file
11
drizzle.config.ts
Normal file
@@ -0,0 +1,11 @@
|
||||
import 'dotenv/config';
|
||||
import { defineConfig } from 'drizzle-kit';
|
||||
|
||||
export default defineConfig({
|
||||
schema: './src/db/schema.ts',
|
||||
out: './drizzle',
|
||||
dialect: 'postgresql',
|
||||
dbCredentials: {
|
||||
url: process.env.DATABASE_URL!,
|
||||
},
|
||||
});
|
||||
71
drizzle/0000_common_mole_man.sql
Normal file
71
drizzle/0000_common_mole_man.sql
Normal file
@@ -0,0 +1,71 @@
|
||||
CREATE TABLE "categories" (
|
||||
"id" integer PRIMARY KEY GENERATED ALWAYS AS IDENTITY (sequence name "categories_id_seq" INCREMENT BY 1 MINVALUE 1 MAXVALUE 2147483647 START WITH 1 CACHE 1),
|
||||
"externalId" integer,
|
||||
"name" varchar(255) NOT NULL,
|
||||
"parentId" integer,
|
||||
"description" text,
|
||||
"createdAt" timestamp DEFAULT now() NOT NULL,
|
||||
"updatedAt" timestamp DEFAULT now()
|
||||
);
|
||||
--> statement-breakpoint
|
||||
CREATE TABLE "products" (
|
||||
"id" integer PRIMARY KEY GENERATED ALWAYS AS IDENTITY (sequence name "products_id_seq" INCREMENT BY 1 MINVALUE 1 MAXVALUE 2147483647 START WITH 1 CACHE 1),
|
||||
"externalId" varchar(50) NOT NULL,
|
||||
"storeId" integer NOT NULL,
|
||||
"categoryId" integer,
|
||||
"name" varchar(500) NOT NULL,
|
||||
"description" text,
|
||||
"url" text,
|
||||
"imageUrl" text,
|
||||
"currentPrice" numeric(10, 2) NOT NULL,
|
||||
"unit" varchar(50),
|
||||
"weight" varchar(100),
|
||||
"brand" varchar(255),
|
||||
"oldPrice" numeric(10, 2),
|
||||
"discountPercent" numeric(5, 2),
|
||||
"promotionEndDate" timestamp,
|
||||
"rating" numeric(3, 2),
|
||||
"scoresCount" integer,
|
||||
"commentsCount" integer,
|
||||
"quantity" integer,
|
||||
"badges" text,
|
||||
"isDetailsFetched" boolean DEFAULT false NOT NULL,
|
||||
"createdAt" timestamp DEFAULT now() NOT NULL,
|
||||
"updatedAt" timestamp DEFAULT now(),
|
||||
CONSTRAINT "products_externalId_storeId_unique" UNIQUE("externalId","storeId")
|
||||
);
|
||||
--> statement-breakpoint
|
||||
CREATE TABLE "scraping_sessions" (
|
||||
"id" integer PRIMARY KEY GENERATED ALWAYS AS IDENTITY (sequence name "scraping_sessions_id_seq" INCREMENT BY 1 MINVALUE 1 MAXVALUE 2147483647 START WITH 1 CACHE 1),
|
||||
"storeId" integer NOT NULL,
|
||||
"sourceType" varchar(50) NOT NULL,
|
||||
"status" varchar(50) NOT NULL,
|
||||
"startedAt" timestamp DEFAULT now() NOT NULL,
|
||||
"finishedAt" timestamp,
|
||||
"error" text
|
||||
);
|
||||
--> statement-breakpoint
|
||||
CREATE TABLE "stores" (
|
||||
"id" integer PRIMARY KEY GENERATED ALWAYS AS IDENTITY (sequence name "stores_id_seq" INCREMENT BY 1 MINVALUE 1 MAXVALUE 2147483647 START WITH 1 CACHE 1),
|
||||
"name" varchar(255) NOT NULL,
|
||||
"type" varchar(50) NOT NULL,
|
||||
"code" varchar(50),
|
||||
"url" text,
|
||||
"region" varchar(255),
|
||||
"address" text,
|
||||
"createdAt" timestamp DEFAULT now() NOT NULL,
|
||||
"updatedAt" timestamp DEFAULT now()
|
||||
);
|
||||
--> statement-breakpoint
|
||||
ALTER TABLE "products" ADD CONSTRAINT "products_storeId_stores_id_fk" FOREIGN KEY ("storeId") REFERENCES "public"."stores"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint
|
||||
ALTER TABLE "products" ADD CONSTRAINT "products_categoryId_categories_id_fk" FOREIGN KEY ("categoryId") REFERENCES "public"."categories"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint
|
||||
ALTER TABLE "scraping_sessions" ADD CONSTRAINT "scraping_sessions_storeId_stores_id_fk" FOREIGN KEY ("storeId") REFERENCES "public"."stores"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint
|
||||
CREATE INDEX "categories_externalId_idx" ON "categories" USING btree ("externalId");--> statement-breakpoint
|
||||
CREATE INDEX "categories_parentId_idx" ON "categories" USING btree ("parentId");--> statement-breakpoint
|
||||
CREATE INDEX "products_storeId_idx" ON "products" USING btree ("storeId");--> statement-breakpoint
|
||||
CREATE INDEX "products_categoryId_idx" ON "products" USING btree ("categoryId");--> statement-breakpoint
|
||||
CREATE INDEX "products_externalId_idx" ON "products" USING btree ("externalId");--> statement-breakpoint
|
||||
CREATE INDEX "scraping_sessions_storeId_idx" ON "scraping_sessions" USING btree ("storeId");--> statement-breakpoint
|
||||
CREATE INDEX "scraping_sessions_status_idx" ON "scraping_sessions" USING btree ("status");--> statement-breakpoint
|
||||
CREATE INDEX "scraping_sessions_startedAt_idx" ON "scraping_sessions" USING btree ("startedAt");--> statement-breakpoint
|
||||
CREATE INDEX "stores_code_idx" ON "stores" USING btree ("code");
|
||||
588
drizzle/meta/0000_snapshot.json
Normal file
588
drizzle/meta/0000_snapshot.json
Normal file
@@ -0,0 +1,588 @@
|
||||
{
|
||||
"id": "0c32aa7e-a09f-4c32-a8bd-35af715f5fde",
|
||||
"prevId": "00000000-0000-0000-0000-000000000000",
|
||||
"version": "7",
|
||||
"dialect": "postgresql",
|
||||
"tables": {
|
||||
"public.categories": {
|
||||
"name": "categories",
|
||||
"schema": "",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"identity": {
|
||||
"type": "always",
|
||||
"name": "categories_id_seq",
|
||||
"schema": "public",
|
||||
"increment": "1",
|
||||
"startWith": "1",
|
||||
"minValue": "1",
|
||||
"maxValue": "2147483647",
|
||||
"cache": "1",
|
||||
"cycle": false
|
||||
}
|
||||
},
|
||||
"externalId": {
|
||||
"name": "externalId",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"name": {
|
||||
"name": "name",
|
||||
"type": "varchar(255)",
|
||||
"primaryKey": false,
|
||||
"notNull": true
|
||||
},
|
||||
"parentId": {
|
||||
"name": "parentId",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"description": {
|
||||
"name": "description",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"createdAt": {
|
||||
"name": "createdAt",
|
||||
"type": "timestamp",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"default": "now()"
|
||||
},
|
||||
"updatedAt": {
|
||||
"name": "updatedAt",
|
||||
"type": "timestamp",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"default": "now()"
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"categories_externalId_idx": {
|
||||
"name": "categories_externalId_idx",
|
||||
"columns": [
|
||||
{
|
||||
"expression": "externalId",
|
||||
"isExpression": false,
|
||||
"asc": true,
|
||||
"nulls": "last"
|
||||
}
|
||||
],
|
||||
"isUnique": false,
|
||||
"concurrently": false,
|
||||
"method": "btree",
|
||||
"with": {}
|
||||
},
|
||||
"categories_parentId_idx": {
|
||||
"name": "categories_parentId_idx",
|
||||
"columns": [
|
||||
{
|
||||
"expression": "parentId",
|
||||
"isExpression": false,
|
||||
"asc": true,
|
||||
"nulls": "last"
|
||||
}
|
||||
],
|
||||
"isUnique": false,
|
||||
"concurrently": false,
|
||||
"method": "btree",
|
||||
"with": {}
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"policies": {},
|
||||
"checkConstraints": {},
|
||||
"isRLSEnabled": false
|
||||
},
|
||||
"public.products": {
|
||||
"name": "products",
|
||||
"schema": "",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"identity": {
|
||||
"type": "always",
|
||||
"name": "products_id_seq",
|
||||
"schema": "public",
|
||||
"increment": "1",
|
||||
"startWith": "1",
|
||||
"minValue": "1",
|
||||
"maxValue": "2147483647",
|
||||
"cache": "1",
|
||||
"cycle": false
|
||||
}
|
||||
},
|
||||
"externalId": {
|
||||
"name": "externalId",
|
||||
"type": "varchar(50)",
|
||||
"primaryKey": false,
|
||||
"notNull": true
|
||||
},
|
||||
"storeId": {
|
||||
"name": "storeId",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true
|
||||
},
|
||||
"categoryId": {
|
||||
"name": "categoryId",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"name": {
|
||||
"name": "name",
|
||||
"type": "varchar(500)",
|
||||
"primaryKey": false,
|
||||
"notNull": true
|
||||
},
|
||||
"description": {
|
||||
"name": "description",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"url": {
|
||||
"name": "url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"imageUrl": {
|
||||
"name": "imageUrl",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"currentPrice": {
|
||||
"name": "currentPrice",
|
||||
"type": "numeric(10, 2)",
|
||||
"primaryKey": false,
|
||||
"notNull": true
|
||||
},
|
||||
"unit": {
|
||||
"name": "unit",
|
||||
"type": "varchar(50)",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"weight": {
|
||||
"name": "weight",
|
||||
"type": "varchar(100)",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"brand": {
|
||||
"name": "brand",
|
||||
"type": "varchar(255)",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"oldPrice": {
|
||||
"name": "oldPrice",
|
||||
"type": "numeric(10, 2)",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"discountPercent": {
|
||||
"name": "discountPercent",
|
||||
"type": "numeric(5, 2)",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"promotionEndDate": {
|
||||
"name": "promotionEndDate",
|
||||
"type": "timestamp",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"rating": {
|
||||
"name": "rating",
|
||||
"type": "numeric(3, 2)",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"scoresCount": {
|
||||
"name": "scoresCount",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"commentsCount": {
|
||||
"name": "commentsCount",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"quantity": {
|
||||
"name": "quantity",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"badges": {
|
||||
"name": "badges",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"isDetailsFetched": {
|
||||
"name": "isDetailsFetched",
|
||||
"type": "boolean",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"default": false
|
||||
},
|
||||
"createdAt": {
|
||||
"name": "createdAt",
|
||||
"type": "timestamp",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"default": "now()"
|
||||
},
|
||||
"updatedAt": {
|
||||
"name": "updatedAt",
|
||||
"type": "timestamp",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"default": "now()"
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"products_storeId_idx": {
|
||||
"name": "products_storeId_idx",
|
||||
"columns": [
|
||||
{
|
||||
"expression": "storeId",
|
||||
"isExpression": false,
|
||||
"asc": true,
|
||||
"nulls": "last"
|
||||
}
|
||||
],
|
||||
"isUnique": false,
|
||||
"concurrently": false,
|
||||
"method": "btree",
|
||||
"with": {}
|
||||
},
|
||||
"products_categoryId_idx": {
|
||||
"name": "products_categoryId_idx",
|
||||
"columns": [
|
||||
{
|
||||
"expression": "categoryId",
|
||||
"isExpression": false,
|
||||
"asc": true,
|
||||
"nulls": "last"
|
||||
}
|
||||
],
|
||||
"isUnique": false,
|
||||
"concurrently": false,
|
||||
"method": "btree",
|
||||
"with": {}
|
||||
},
|
||||
"products_externalId_idx": {
|
||||
"name": "products_externalId_idx",
|
||||
"columns": [
|
||||
{
|
||||
"expression": "externalId",
|
||||
"isExpression": false,
|
||||
"asc": true,
|
||||
"nulls": "last"
|
||||
}
|
||||
],
|
||||
"isUnique": false,
|
||||
"concurrently": false,
|
||||
"method": "btree",
|
||||
"with": {}
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"products_storeId_stores_id_fk": {
|
||||
"name": "products_storeId_stores_id_fk",
|
||||
"tableFrom": "products",
|
||||
"tableTo": "stores",
|
||||
"columnsFrom": [
|
||||
"storeId"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "no action",
|
||||
"onUpdate": "no action"
|
||||
},
|
||||
"products_categoryId_categories_id_fk": {
|
||||
"name": "products_categoryId_categories_id_fk",
|
||||
"tableFrom": "products",
|
||||
"tableTo": "categories",
|
||||
"columnsFrom": [
|
||||
"categoryId"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "no action",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {
|
||||
"products_externalId_storeId_unique": {
|
||||
"name": "products_externalId_storeId_unique",
|
||||
"nullsNotDistinct": false,
|
||||
"columns": [
|
||||
"externalId",
|
||||
"storeId"
|
||||
]
|
||||
}
|
||||
},
|
||||
"policies": {},
|
||||
"checkConstraints": {},
|
||||
"isRLSEnabled": false
|
||||
},
|
||||
"public.scraping_sessions": {
|
||||
"name": "scraping_sessions",
|
||||
"schema": "",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"identity": {
|
||||
"type": "always",
|
||||
"name": "scraping_sessions_id_seq",
|
||||
"schema": "public",
|
||||
"increment": "1",
|
||||
"startWith": "1",
|
||||
"minValue": "1",
|
||||
"maxValue": "2147483647",
|
||||
"cache": "1",
|
||||
"cycle": false
|
||||
}
|
||||
},
|
||||
"storeId": {
|
||||
"name": "storeId",
|
||||
"type": "integer",
|
||||
"primaryKey": false,
|
||||
"notNull": true
|
||||
},
|
||||
"sourceType": {
|
||||
"name": "sourceType",
|
||||
"type": "varchar(50)",
|
||||
"primaryKey": false,
|
||||
"notNull": true
|
||||
},
|
||||
"status": {
|
||||
"name": "status",
|
||||
"type": "varchar(50)",
|
||||
"primaryKey": false,
|
||||
"notNull": true
|
||||
},
|
||||
"startedAt": {
|
||||
"name": "startedAt",
|
||||
"type": "timestamp",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"default": "now()"
|
||||
},
|
||||
"finishedAt": {
|
||||
"name": "finishedAt",
|
||||
"type": "timestamp",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"error": {
|
||||
"name": "error",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"scraping_sessions_storeId_idx": {
|
||||
"name": "scraping_sessions_storeId_idx",
|
||||
"columns": [
|
||||
{
|
||||
"expression": "storeId",
|
||||
"isExpression": false,
|
||||
"asc": true,
|
||||
"nulls": "last"
|
||||
}
|
||||
],
|
||||
"isUnique": false,
|
||||
"concurrently": false,
|
||||
"method": "btree",
|
||||
"with": {}
|
||||
},
|
||||
"scraping_sessions_status_idx": {
|
||||
"name": "scraping_sessions_status_idx",
|
||||
"columns": [
|
||||
{
|
||||
"expression": "status",
|
||||
"isExpression": false,
|
||||
"asc": true,
|
||||
"nulls": "last"
|
||||
}
|
||||
],
|
||||
"isUnique": false,
|
||||
"concurrently": false,
|
||||
"method": "btree",
|
||||
"with": {}
|
||||
},
|
||||
"scraping_sessions_startedAt_idx": {
|
||||
"name": "scraping_sessions_startedAt_idx",
|
||||
"columns": [
|
||||
{
|
||||
"expression": "startedAt",
|
||||
"isExpression": false,
|
||||
"asc": true,
|
||||
"nulls": "last"
|
||||
}
|
||||
],
|
||||
"isUnique": false,
|
||||
"concurrently": false,
|
||||
"method": "btree",
|
||||
"with": {}
|
||||
}
|
||||
},
|
||||
"foreignKeys": {
|
||||
"scraping_sessions_storeId_stores_id_fk": {
|
||||
"name": "scraping_sessions_storeId_stores_id_fk",
|
||||
"tableFrom": "scraping_sessions",
|
||||
"tableTo": "stores",
|
||||
"columnsFrom": [
|
||||
"storeId"
|
||||
],
|
||||
"columnsTo": [
|
||||
"id"
|
||||
],
|
||||
"onDelete": "no action",
|
||||
"onUpdate": "no action"
|
||||
}
|
||||
},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"policies": {},
|
||||
"checkConstraints": {},
|
||||
"isRLSEnabled": false
|
||||
},
|
||||
"public.stores": {
|
||||
"name": "stores",
|
||||
"schema": "",
|
||||
"columns": {
|
||||
"id": {
|
||||
"name": "id",
|
||||
"type": "integer",
|
||||
"primaryKey": true,
|
||||
"notNull": true,
|
||||
"identity": {
|
||||
"type": "always",
|
||||
"name": "stores_id_seq",
|
||||
"schema": "public",
|
||||
"increment": "1",
|
||||
"startWith": "1",
|
||||
"minValue": "1",
|
||||
"maxValue": "2147483647",
|
||||
"cache": "1",
|
||||
"cycle": false
|
||||
}
|
||||
},
|
||||
"name": {
|
||||
"name": "name",
|
||||
"type": "varchar(255)",
|
||||
"primaryKey": false,
|
||||
"notNull": true
|
||||
},
|
||||
"type": {
|
||||
"name": "type",
|
||||
"type": "varchar(50)",
|
||||
"primaryKey": false,
|
||||
"notNull": true
|
||||
},
|
||||
"code": {
|
||||
"name": "code",
|
||||
"type": "varchar(50)",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"url": {
|
||||
"name": "url",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"region": {
|
||||
"name": "region",
|
||||
"type": "varchar(255)",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"address": {
|
||||
"name": "address",
|
||||
"type": "text",
|
||||
"primaryKey": false,
|
||||
"notNull": false
|
||||
},
|
||||
"createdAt": {
|
||||
"name": "createdAt",
|
||||
"type": "timestamp",
|
||||
"primaryKey": false,
|
||||
"notNull": true,
|
||||
"default": "now()"
|
||||
},
|
||||
"updatedAt": {
|
||||
"name": "updatedAt",
|
||||
"type": "timestamp",
|
||||
"primaryKey": false,
|
||||
"notNull": false,
|
||||
"default": "now()"
|
||||
}
|
||||
},
|
||||
"indexes": {
|
||||
"stores_code_idx": {
|
||||
"name": "stores_code_idx",
|
||||
"columns": [
|
||||
{
|
||||
"expression": "code",
|
||||
"isExpression": false,
|
||||
"asc": true,
|
||||
"nulls": "last"
|
||||
}
|
||||
],
|
||||
"isUnique": false,
|
||||
"concurrently": false,
|
||||
"method": "btree",
|
||||
"with": {}
|
||||
}
|
||||
},
|
||||
"foreignKeys": {},
|
||||
"compositePrimaryKeys": {},
|
||||
"uniqueConstraints": {},
|
||||
"policies": {},
|
||||
"checkConstraints": {},
|
||||
"isRLSEnabled": false
|
||||
}
|
||||
},
|
||||
"enums": {},
|
||||
"schemas": {},
|
||||
"sequences": {},
|
||||
"roles": {},
|
||||
"policies": {},
|
||||
"views": {},
|
||||
"_meta": {
|
||||
"columns": {},
|
||||
"schemas": {},
|
||||
"tables": {}
|
||||
}
|
||||
}
|
||||
13
drizzle/meta/_journal.json
Normal file
13
drizzle/meta/_journal.json
Normal file
@@ -0,0 +1,13 @@
|
||||
{
|
||||
"version": "7",
|
||||
"dialect": "postgresql",
|
||||
"entries": [
|
||||
{
|
||||
"idx": 0,
|
||||
"version": "7",
|
||||
"when": 1769461513369,
|
||||
"tag": "0000_common_mole_man",
|
||||
"breakpoints": true
|
||||
}
|
||||
]
|
||||
}
|
||||
13
package.json
13
package.json
@@ -10,10 +10,10 @@
|
||||
"dev": "tsx src/scripts/scrape-magnit-products.ts",
|
||||
"enrich": "tsx src/scripts/enrich-product-details.ts",
|
||||
"test-db": "tsx src/scripts/test-db-connection.ts",
|
||||
"prisma:generate": "prisma generate",
|
||||
"prisma:migrate": "prisma migrate dev",
|
||||
"prisma:studio": "prisma studio --config=prisma.config.ts",
|
||||
"prisma:format": "prisma format"
|
||||
"db:generate": "drizzle-kit generate",
|
||||
"db:migrate": "drizzle-kit migrate",
|
||||
"db:push": "drizzle-kit push",
|
||||
"db:studio": "drizzle-kit studio"
|
||||
},
|
||||
"keywords": [
|
||||
"scraper",
|
||||
@@ -23,17 +23,16 @@
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"@prisma/adapter-pg": "^7.2.0",
|
||||
"@prisma/client": "^7.2.0",
|
||||
"axios": "^1.13.2",
|
||||
"dotenv": "^17.2.3",
|
||||
"drizzle-orm": "^0.45.1",
|
||||
"pg": "^8.16.3",
|
||||
"playwright": "^1.57.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@types/node": "^25.0.3",
|
||||
"@types/pg": "^8.16.0",
|
||||
"prisma": "^7.2.0",
|
||||
"drizzle-kit": "^0.31.8",
|
||||
"ts-node": "^10.9.2",
|
||||
"tsx": "^4.21.0",
|
||||
"typescript": "^5.9.3"
|
||||
|
||||
1386
pnpm-lock.yaml
generated
1386
pnpm-lock.yaml
generated
File diff suppressed because it is too large
Load Diff
@@ -1,12 +0,0 @@
|
||||
import 'dotenv/config'
|
||||
import { defineConfig } from 'prisma/config'
|
||||
|
||||
export default defineConfig({
|
||||
schema: 'src/database/prisma/schema.prisma',
|
||||
migrations: {
|
||||
path: 'src/database/prisma/migrations',
|
||||
},
|
||||
datasource: {
|
||||
url: process.env.DATABASE_URL,
|
||||
},
|
||||
})
|
||||
@@ -1,15 +1,12 @@
|
||||
import "dotenv/config";
|
||||
import { PrismaPg } from '@prisma/adapter-pg';
|
||||
import { PrismaClient } from '../../generated/prisma/client.js';
|
||||
import 'dotenv/config';
|
||||
import { db } from '../database/client.js';
|
||||
import { stores } from '../db/schema.js';
|
||||
|
||||
const connectionString = `${process.env.DATABASE_URL}`;
|
||||
|
||||
const adapter = new PrismaPg({ connectionString });
|
||||
export const prisma = new PrismaClient({ adapter });
|
||||
export { db };
|
||||
|
||||
export async function connectDatabase() {
|
||||
try {
|
||||
await prisma.$connect();
|
||||
await db.select().from(stores).limit(1);
|
||||
console.log('✅ Подключение к базе данных установлено');
|
||||
} catch (error) {
|
||||
console.error('❌ Ошибка подключения к базе данных:', error);
|
||||
@@ -18,7 +15,6 @@ export async function connectDatabase() {
|
||||
}
|
||||
|
||||
export async function disconnectDatabase() {
|
||||
await prisma.$disconnect();
|
||||
await db.$client.end();
|
||||
console.log('✅ Отключение от базы данных');
|
||||
}
|
||||
|
||||
|
||||
@@ -1,11 +1,12 @@
|
||||
import "dotenv/config";
|
||||
import { PrismaPg } from '@prisma/adapter-pg';
|
||||
import { PrismaClient } from '../../generated/prisma/client.js';
|
||||
import 'dotenv/config';
|
||||
import { drizzle } from 'drizzle-orm/node-postgres';
|
||||
import { Pool } from 'pg';
|
||||
import * as schema from '../db/schema.js';
|
||||
|
||||
const connectionString = `${process.env.DATABASE_URL}`;
|
||||
const connectionString = process.env.DATABASE_URL;
|
||||
|
||||
const adapter = new PrismaPg({ connectionString });
|
||||
export const prisma = new PrismaClient({ adapter });
|
||||
const pool = new Pool({ connectionString });
|
||||
|
||||
export default prisma;
|
||||
export const db = drizzle(pool, { schema });
|
||||
|
||||
export default db;
|
||||
|
||||
@@ -1,104 +0,0 @@
|
||||
// This is your Prisma schema file,
|
||||
// learn more about it in the docs: https://pris.ly/d/prisma-schema
|
||||
|
||||
generator client {
|
||||
provider = "prisma-client"
|
||||
output = "../../../generated/prisma"
|
||||
}
|
||||
|
||||
datasource db {
|
||||
provider = "postgresql"
|
||||
}
|
||||
|
||||
model Store {
|
||||
id Int @id @default(autoincrement())
|
||||
name String
|
||||
type String // "web" | "app"
|
||||
code String? // storeCode для API (например "992301")
|
||||
url String?
|
||||
region String?
|
||||
address String?
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
|
||||
products Product[]
|
||||
sessions ScrapingSession[]
|
||||
|
||||
@@index([code])
|
||||
}
|
||||
|
||||
model Category {
|
||||
id Int @id @default(autoincrement())
|
||||
externalId Int? // ID из внешнего API
|
||||
name String
|
||||
parentId Int?
|
||||
parent Category? @relation("CategoryHierarchy", fields: [parentId], references: [id])
|
||||
children Category[] @relation("CategoryHierarchy")
|
||||
description String?
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
|
||||
products Product[]
|
||||
|
||||
@@index([externalId])
|
||||
@@index([parentId])
|
||||
}
|
||||
|
||||
model Product {
|
||||
id Int @id @default(autoincrement())
|
||||
externalId String // ID из API (например "1000300796")
|
||||
storeId Int
|
||||
categoryId Int?
|
||||
name String
|
||||
description String?
|
||||
url String?
|
||||
imageUrl String?
|
||||
currentPrice Decimal @db.Decimal(10, 2)
|
||||
unit String? // единица измерения
|
||||
weight String? // вес/объем
|
||||
brand String?
|
||||
|
||||
// Промо-информация
|
||||
oldPrice Decimal? @db.Decimal(10, 2) // старая цена при акции
|
||||
discountPercent Decimal? @db.Decimal(5, 2) // процент скидки
|
||||
promotionEndDate DateTime? // дата окончания акции
|
||||
|
||||
// Рейтинги
|
||||
rating Decimal? @db.Decimal(3, 2) // рейтинг товара
|
||||
scoresCount Int? // количество оценок
|
||||
commentsCount Int? // количество комментариев
|
||||
|
||||
// Остаток и бейджи
|
||||
quantity Int? // остаток на складе
|
||||
badges String? // массив бейджей в формате JSON
|
||||
|
||||
// Детальная информация
|
||||
isDetailsFetched Boolean @default(false) // были ли получены детали через detail endpoint
|
||||
|
||||
createdAt DateTime @default(now())
|
||||
updatedAt DateTime @updatedAt
|
||||
|
||||
store Store @relation(fields: [storeId], references: [id])
|
||||
category Category? @relation(fields: [categoryId], references: [id])
|
||||
|
||||
@@unique([externalId, storeId])
|
||||
@@index([storeId])
|
||||
@@index([categoryId])
|
||||
@@index([externalId])
|
||||
}
|
||||
|
||||
model ScrapingSession {
|
||||
id Int @id @default(autoincrement())
|
||||
storeId Int
|
||||
sourceType String // "api" | "web" | "app"
|
||||
status String // "pending" | "running" | "completed" | "failed"
|
||||
startedAt DateTime @default(now())
|
||||
finishedAt DateTime?
|
||||
error String?
|
||||
|
||||
store Store @relation(fields: [storeId], references: [id])
|
||||
|
||||
@@index([storeId])
|
||||
@@index([status])
|
||||
@@index([startedAt])
|
||||
}
|
||||
73
src/db/schema.ts
Normal file
73
src/db/schema.ts
Normal file
@@ -0,0 +1,73 @@
|
||||
import { pgTable, integer, varchar, text, decimal, timestamp, index, unique, boolean } from 'drizzle-orm/pg-core';
|
||||
|
||||
export const stores = pgTable('stores', {
|
||||
id: integer().primaryKey().generatedAlwaysAsIdentity(),
|
||||
name: varchar({ length: 255 }).notNull(),
|
||||
type: varchar({ length: 50 }).notNull(),
|
||||
code: varchar({ length: 50 }),
|
||||
url: text(),
|
||||
region: varchar({ length: 255 }),
|
||||
address: text(),
|
||||
createdAt: timestamp().defaultNow().notNull(),
|
||||
updatedAt: timestamp().defaultNow(),
|
||||
}, (table) => [
|
||||
index('stores_code_idx').on(table.code),
|
||||
]);
|
||||
|
||||
export const categories = pgTable('categories', {
|
||||
id: integer().primaryKey().generatedAlwaysAsIdentity(),
|
||||
externalId: integer(),
|
||||
name: varchar({ length: 255 }).notNull(),
|
||||
parentId: integer(),
|
||||
description: text(),
|
||||
createdAt: timestamp().defaultNow().notNull(),
|
||||
updatedAt: timestamp().defaultNow(),
|
||||
}, (table) => [
|
||||
index('categories_externalId_idx').on(table.externalId),
|
||||
index('categories_parentId_idx').on(table.parentId),
|
||||
]);
|
||||
|
||||
export const products = pgTable('products', {
|
||||
id: integer().primaryKey().generatedAlwaysAsIdentity(),
|
||||
externalId: varchar({ length: 50 }).notNull(),
|
||||
storeId: integer().notNull().references(() => stores.id),
|
||||
categoryId: integer().references(() => categories.id),
|
||||
name: varchar({ length: 500 }).notNull(),
|
||||
description: text(),
|
||||
url: text(),
|
||||
imageUrl: text(),
|
||||
currentPrice: decimal({ precision: 10, scale: 2 }).notNull(),
|
||||
unit: varchar({ length: 50 }),
|
||||
weight: varchar({ length: 100 }),
|
||||
brand: varchar({ length: 255 }),
|
||||
oldPrice: decimal({ precision: 10, scale: 2 }),
|
||||
discountPercent: decimal({ precision: 5, scale: 2 }),
|
||||
promotionEndDate: timestamp(),
|
||||
rating: decimal({ precision: 3, scale: 2 }),
|
||||
scoresCount: integer(),
|
||||
commentsCount: integer(),
|
||||
quantity: integer(),
|
||||
badges: text(),
|
||||
isDetailsFetched: boolean().default(false).notNull(),
|
||||
createdAt: timestamp().defaultNow().notNull(),
|
||||
updatedAt: timestamp().defaultNow(),
|
||||
}, (table) => [
|
||||
unique('products_externalId_storeId_unique').on(table.externalId, table.storeId),
|
||||
index('products_storeId_idx').on(table.storeId),
|
||||
index('products_categoryId_idx').on(table.categoryId),
|
||||
index('products_externalId_idx').on(table.externalId),
|
||||
]);
|
||||
|
||||
export const scrapingSessions = pgTable('scraping_sessions', {
|
||||
id: integer().primaryKey().generatedAlwaysAsIdentity(),
|
||||
storeId: integer().notNull().references(() => stores.id),
|
||||
sourceType: varchar({ length: 50 }).notNull(),
|
||||
status: varchar({ length: 50 }).notNull(),
|
||||
startedAt: timestamp().defaultNow().notNull(),
|
||||
finishedAt: timestamp(),
|
||||
error: text(),
|
||||
}, (table) => [
|
||||
index('scraping_sessions_storeId_idx').on(table.storeId),
|
||||
index('scraping_sessions_status_idx').on(table.status),
|
||||
index('scraping_sessions_startedAt_idx').on(table.startedAt),
|
||||
]);
|
||||
@@ -13,7 +13,6 @@ import {
|
||||
} from './types.js';
|
||||
import { ProductService } from '../../../services/product/ProductService.js';
|
||||
import { ProductParser } from '../../../services/parser/ProductParser.js';
|
||||
import { PrismaClient } from '../../../../generated/prisma/client.js';
|
||||
import { withRetryAndReinit } from '../../../utils/retry.js';
|
||||
|
||||
export interface MagnitScraperConfig {
|
||||
@@ -510,12 +509,12 @@ export class MagnitApiScraper {
|
||||
*/
|
||||
async saveToDatabase(
|
||||
products: ProductItem[],
|
||||
prisma: PrismaClient
|
||||
drizzleDb: any
|
||||
): Promise<number> {
|
||||
try {
|
||||
Logger.info(`Начало сохранения ${products.length} товаров в БД...`);
|
||||
|
||||
const productService = new ProductService(prisma);
|
||||
const productService = new ProductService(drizzleDb);
|
||||
|
||||
// Получаем или создаем магазин
|
||||
const store = await productService.getOrCreateStore(
|
||||
@@ -558,13 +557,13 @@ export class MagnitApiScraper {
|
||||
* Более эффективно для больших каталогов
|
||||
*/
|
||||
async saveToDatabaseStreaming(
|
||||
prisma: PrismaClient,
|
||||
drizzleDb: any,
|
||||
options: {
|
||||
batchSize?: number; // default: 50
|
||||
maxProducts?: number;
|
||||
} = {}
|
||||
): Promise<number> {
|
||||
const productService = new ProductService(prisma);
|
||||
const productService = new ProductService(drizzleDb);
|
||||
let totalSaved = 0;
|
||||
|
||||
// Получаем магазин один раз в начале
|
||||
|
||||
@@ -1,93 +1,46 @@
|
||||
import 'dotenv/config';
|
||||
import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js';
|
||||
import { connectDatabase, disconnectDatabase, db } from '../config/database.js';
|
||||
import { products, categories } from '../db/schema.js';
|
||||
import { isNull, and, not } from 'drizzle-orm';
|
||||
import { Logger } from '../utils/logger.js';
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
await connectDatabase();
|
||||
|
||||
// Check total products and null categoryId count
|
||||
const totalProducts = await prisma.product.count();
|
||||
const nullCategoryCount = await prisma.product.count({
|
||||
where: { categoryId: null }
|
||||
});
|
||||
const withCategoryCount = await prisma.product.count({
|
||||
where: { categoryId: { not: null } }
|
||||
});
|
||||
const allProducts = await db.select().from(products);
|
||||
const totalProducts = allProducts.length;
|
||||
const nullCategoryCount = allProducts.filter(p => p.categoryId === null).length;
|
||||
const withCategoryCount = totalProducts - nullCategoryCount;
|
||||
|
||||
Logger.info('\n📊 СТАТИСТИКА ПО КАТЕГОРИЯМ:');
|
||||
Logger.info(`Всего товаров: ${totalProducts}`);
|
||||
Logger.info(`Товаров без категории (null): ${nullCategoryCount} (${((nullCategoryCount / totalProducts) * 100).toFixed(2)}%)`);
|
||||
Logger.info(`Товаров с категорией: ${withCategoryCount} (${((withCategoryCount / totalProducts) * 100).toFixed(2)}%)`);
|
||||
|
||||
// Check total categories
|
||||
const totalCategories = await prisma.category.count();
|
||||
Logger.info(`\nВсего категорий в БД: ${totalCategories}`);
|
||||
const allCategories = await db.select().from(categories);
|
||||
Logger.info(`\nВсего категорий в БД: ${allCategories.length}`);
|
||||
|
||||
// Sample categories
|
||||
if (totalCategories > 0) {
|
||||
const sampleCategories = await prisma.category.findMany({
|
||||
take: 5,
|
||||
select: {
|
||||
id: true,
|
||||
externalId: true,
|
||||
name: true,
|
||||
_count: {
|
||||
select: { products: true }
|
||||
}
|
||||
}
|
||||
});
|
||||
if (allCategories.length > 0) {
|
||||
const sampleCategories = allCategories.slice(0, 5);
|
||||
|
||||
Logger.info('\n📁 Примеры категорий:');
|
||||
sampleCategories.forEach(cat => {
|
||||
Logger.info(` - [${cat.externalId}] ${cat.name} (товаров: ${cat._count.products})`);
|
||||
sampleCategories.forEach((cat: any) => {
|
||||
const productsCount = allProducts.filter(p => p.categoryId === cat.id).length;
|
||||
Logger.info(` - [${cat.externalId}] ${cat.name} (товаров: ${productsCount})`);
|
||||
});
|
||||
}
|
||||
|
||||
// Sample products without categories
|
||||
const productsWithoutCategory = await prisma.product.findMany({
|
||||
where: { categoryId: null },
|
||||
take: 5,
|
||||
select: {
|
||||
id: true,
|
||||
externalId: true,
|
||||
name: true,
|
||||
currentPrice: true
|
||||
}
|
||||
});
|
||||
|
||||
Logger.info('\n❌ Примеры товаров БЕЗ категории:');
|
||||
productsWithoutCategory.forEach(p => {
|
||||
Logger.info(` - [${p.externalId}] ${p.name} (₽${p.currentPrice})`);
|
||||
});
|
||||
|
||||
// Sample products with categories
|
||||
const productsWithCategory = await prisma.product.findMany({
|
||||
where: { categoryId: { not: null } },
|
||||
take: 5,
|
||||
select: {
|
||||
id: true,
|
||||
externalId: true,
|
||||
name: true,
|
||||
currentPrice: true,
|
||||
category: {
|
||||
select: {
|
||||
externalId: true,
|
||||
name: true
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
if (productsWithCategory.length > 0) {
|
||||
Logger.info('\n✅ Примеры товаров С категорией:');
|
||||
productsWithCategory.forEach(p => {
|
||||
Logger.info(` - [${p.externalId}] ${p.name} → [${p.category?.externalId}] ${p.category?.name}`);
|
||||
const productsWithoutCategory = allProducts.filter(p => p.categoryId === null).slice(0, 5);
|
||||
if (productsWithoutCategory.length > 0) {
|
||||
Logger.info('\n📦 Примеры товаров без категории:');
|
||||
productsWithoutCategory.forEach((p: any) => {
|
||||
Logger.info(` - [${p.externalId}] ${p.name}`);
|
||||
});
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
Logger.error('❌ Ошибка при анализе:', error);
|
||||
Logger.error('❌ Ошибка:', error);
|
||||
process.exit(1);
|
||||
} finally {
|
||||
await disconnectDatabase();
|
||||
|
||||
@@ -1,30 +1,17 @@
|
||||
import 'dotenv/config';
|
||||
import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js';
|
||||
import { connectDatabase, disconnectDatabase, db } from '../config/database.js';
|
||||
import { products } from '../db/schema.js';
|
||||
import { Logger } from '../utils/logger.js';
|
||||
|
||||
async function main() {
|
||||
try {
|
||||
await connectDatabase();
|
||||
|
||||
// Get a sample product with all fields
|
||||
const product = await prisma.product.findFirst({
|
||||
select: {
|
||||
id: true,
|
||||
externalId: true,
|
||||
name: true,
|
||||
description: true,
|
||||
currentPrice: true,
|
||||
unit: true,
|
||||
weight: true,
|
||||
brand: true,
|
||||
categoryId: true,
|
||||
badges: true,
|
||||
}
|
||||
});
|
||||
const result = await db.select().from(products).limit(1);
|
||||
|
||||
if (product) {
|
||||
if (result.length > 0) {
|
||||
Logger.info('=== ДЕТАЛИ ТОВАРА ИЗ БД ===');
|
||||
Logger.info(JSON.stringify(product, null, 2));
|
||||
Logger.info(JSON.stringify(result[0], null, 2));
|
||||
}
|
||||
|
||||
} catch (error) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
import 'dotenv/config';
|
||||
import { prisma } from '../config/database.js';
|
||||
import { db } from '../config/database.js';
|
||||
import { MagnitApiScraper } from '../scrapers/api/magnit/MagnitApiScraper.js';
|
||||
import { ProductService } from '../services/product/ProductService.js';
|
||||
import { ProductParser } from '../services/parser/ProductParser.js';
|
||||
@@ -33,7 +33,7 @@ async function main() {
|
||||
}
|
||||
|
||||
// Инициализация
|
||||
const productService = new ProductService(prisma);
|
||||
const productService = new ProductService(db);
|
||||
const scraper = new MagnitApiScraper({
|
||||
storeCode,
|
||||
rateLimitDelay: 300,
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import 'dotenv/config';
|
||||
import { MagnitApiScraper } from '../scrapers/api/magnit/MagnitApiScraper.js';
|
||||
import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js';
|
||||
import { connectDatabase, disconnectDatabase, db } from '../config/database.js';
|
||||
import { Logger } from '../utils/logger.js';
|
||||
|
||||
async function main() {
|
||||
@@ -64,7 +64,7 @@ async function main() {
|
||||
// STREAMING режим (рекомендуется для больших каталогов)
|
||||
Logger.info('📡 Использование потокового режима скрапинга');
|
||||
|
||||
saved = await scraper.saveToDatabaseStreaming(prisma, {
|
||||
saved = await scraper.saveToDatabaseStreaming(db, {
|
||||
batchSize: 50,
|
||||
maxProducts,
|
||||
});
|
||||
@@ -77,7 +77,7 @@ async function main() {
|
||||
Logger.info(`📦 Получено товаров: ${products.length}`);
|
||||
|
||||
if (products.length > 0) {
|
||||
saved = await scraper.saveToDatabase(products, prisma);
|
||||
saved = await scraper.saveToDatabase(products, db);
|
||||
} else {
|
||||
Logger.warn('⚠️ Товары не найдены');
|
||||
}
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import { PrismaClient, Product, Store, Category } from '../../../generated/prisma/client.js';
|
||||
import { db } from '../../database/client.js';
|
||||
import { products, stores, categories } from '../../db/schema.js';
|
||||
import { eq, and, asc } from 'drizzle-orm';
|
||||
import { Logger } from '../../utils/logger.js';
|
||||
import { DatabaseError } from '../../utils/errors.js';
|
||||
|
||||
@@ -25,64 +27,124 @@ export interface CreateProductData {
|
||||
isDetailsFetched?: boolean;
|
||||
}
|
||||
|
||||
export class ProductService {
|
||||
constructor(private prisma: PrismaClient) {}
|
||||
export interface Product {
|
||||
id: number;
|
||||
externalId: string;
|
||||
storeId: number;
|
||||
categoryId: number | null;
|
||||
name: string;
|
||||
description: string | null;
|
||||
url: string | null;
|
||||
imageUrl: string | null;
|
||||
currentPrice: string;
|
||||
unit: string | null;
|
||||
weight: string | null;
|
||||
brand: string | null;
|
||||
oldPrice: string | null;
|
||||
discountPercent: string | null;
|
||||
promotionEndDate: Date | null;
|
||||
rating: string | null;
|
||||
scoresCount: number | null;
|
||||
commentsCount: number | null;
|
||||
quantity: number | null;
|
||||
badges: string | null;
|
||||
isDetailsFetched: boolean;
|
||||
createdAt: Date;
|
||||
updatedAt: Date | null;
|
||||
}
|
||||
|
||||
export interface Store {
|
||||
id: number;
|
||||
name: string;
|
||||
type: string;
|
||||
code: string | null;
|
||||
url: string | null;
|
||||
region: string | null;
|
||||
address: string | null;
|
||||
createdAt: Date;
|
||||
updatedAt: Date | null;
|
||||
}
|
||||
|
||||
export interface Category {
|
||||
id: number;
|
||||
externalId: number | null;
|
||||
name: string;
|
||||
parentId: number | null;
|
||||
description: string | null;
|
||||
createdAt: Date;
|
||||
updatedAt: Date | null;
|
||||
}
|
||||
|
||||
export class ProductService {
|
||||
constructor(private drizzleDb: typeof db) {}
|
||||
|
||||
private get db() {
|
||||
return this.drizzleDb;
|
||||
}
|
||||
|
||||
/**
|
||||
* Сохранение одного товара
|
||||
*/
|
||||
async saveProduct(data: CreateProductData): Promise<Product> {
|
||||
try {
|
||||
// Проверяем, существует ли товар
|
||||
const existing = await this.prisma.product.findUnique({
|
||||
where: {
|
||||
externalId_storeId: {
|
||||
externalId: data.externalId,
|
||||
storeId: data.storeId,
|
||||
},
|
||||
},
|
||||
});
|
||||
const existing = await this.db
|
||||
.select()
|
||||
.from(products)
|
||||
.where(
|
||||
and(
|
||||
eq(products.externalId, data.externalId),
|
||||
eq(products.storeId, data.storeId)
|
||||
)
|
||||
)
|
||||
.limit(1);
|
||||
|
||||
if (existing) {
|
||||
// Обновляем существующий товар
|
||||
if (existing.length > 0) {
|
||||
Logger.debug(`Обновление товара: ${data.externalId}`);
|
||||
|
||||
// Если isDetailsFetched не передан явно, сохраняем текущее значение
|
||||
const updateData: any = {
|
||||
name: data.name,
|
||||
description: data.description,
|
||||
url: data.url,
|
||||
imageUrl: data.imageUrl,
|
||||
currentPrice: data.currentPrice,
|
||||
currentPrice: data.currentPrice.toString(),
|
||||
unit: data.unit,
|
||||
weight: data.weight,
|
||||
brand: data.brand,
|
||||
oldPrice: data.oldPrice,
|
||||
discountPercent: data.discountPercent,
|
||||
oldPrice: data.oldPrice?.toString(),
|
||||
discountPercent: data.discountPercent?.toString(),
|
||||
promotionEndDate: data.promotionEndDate,
|
||||
rating: data.rating,
|
||||
rating: data.rating?.toString(),
|
||||
scoresCount: data.scoresCount,
|
||||
commentsCount: data.commentsCount,
|
||||
quantity: data.quantity,
|
||||
badges: data.badges,
|
||||
categoryId: data.categoryId,
|
||||
updatedAt: new Date(),
|
||||
};
|
||||
|
||||
// Обновляем isDetailsFetched только если передано явно
|
||||
if (data.isDetailsFetched !== undefined) {
|
||||
updateData.isDetailsFetched = data.isDetailsFetched;
|
||||
}
|
||||
|
||||
return await this.prisma.product.update({
|
||||
where: { id: existing.id },
|
||||
data: updateData,
|
||||
});
|
||||
const result = await this.db
|
||||
.update(products)
|
||||
.set(updateData)
|
||||
.where(eq(products.id, existing[0].id))
|
||||
.returning();
|
||||
|
||||
return result[0] as Product;
|
||||
} else {
|
||||
// Создаем новый товар
|
||||
Logger.debug(`Создание нового товара: ${data.externalId}`);
|
||||
return await this.prisma.product.create({
|
||||
data,
|
||||
});
|
||||
const result = await this.db
|
||||
.insert(products)
|
||||
.values({
|
||||
...data,
|
||||
currentPrice: data.currentPrice.toString(),
|
||||
oldPrice: data.oldPrice?.toString(),
|
||||
discountPercent: data.discountPercent?.toString(),
|
||||
rating: data.rating?.toString(),
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.returning();
|
||||
|
||||
return result[0] as Product;
|
||||
}
|
||||
} catch (error) {
|
||||
Logger.error('Ошибка сохранения товара:', error);
|
||||
@@ -93,22 +155,18 @@ export class ProductService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Сохранение нескольких товаров батчами
|
||||
*/
|
||||
async saveProducts(products: CreateProductData[]): Promise<number> {
|
||||
async saveProducts(productsData: CreateProductData[]): Promise<number> {
|
||||
try {
|
||||
Logger.info(`Сохранение ${products.length} товаров...`);
|
||||
Logger.info(`Сохранение ${productsData.length} товаров...`);
|
||||
let saved = 0;
|
||||
|
||||
// Обрабатываем батчами по 50 товаров
|
||||
const batchSize = 50;
|
||||
for (let i = 0; i < products.length; i += batchSize) {
|
||||
const batch = products.slice(i, i + batchSize);
|
||||
for (let i = 0; i < productsData.length; i += batchSize) {
|
||||
const batch = productsData.slice(i, i + batchSize);
|
||||
const promises = batch.map(product => this.saveProduct(product));
|
||||
await Promise.all(promises);
|
||||
saved += batch.length;
|
||||
Logger.info(`Сохранено товаров: ${saved}/${products.length}`);
|
||||
Logger.info(`Сохранено товаров: ${saved}/${productsData.length}`);
|
||||
}
|
||||
|
||||
Logger.info(`✅ Всего сохранено товаров: ${saved}`);
|
||||
@@ -119,22 +177,23 @@ export class ProductService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Поиск товара по externalId и storeId
|
||||
*/
|
||||
async findByExternalId(
|
||||
externalId: string,
|
||||
storeId: number
|
||||
): Promise<Product | null> {
|
||||
try {
|
||||
return await this.prisma.product.findUnique({
|
||||
where: {
|
||||
externalId_storeId: {
|
||||
externalId,
|
||||
storeId,
|
||||
},
|
||||
},
|
||||
});
|
||||
const result = await this.db
|
||||
.select()
|
||||
.from(products)
|
||||
.where(
|
||||
and(
|
||||
eq(products.externalId, externalId),
|
||||
eq(products.storeId, storeId)
|
||||
)
|
||||
)
|
||||
.limit(1);
|
||||
|
||||
return result.length > 0 ? (result[0] as Product) : null;
|
||||
} catch (error) {
|
||||
Logger.error('Ошибка поиска товара:', error);
|
||||
throw new DatabaseError(
|
||||
@@ -144,30 +203,33 @@ export class ProductService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Получение или создание магазина
|
||||
*/
|
||||
async getOrCreateStore(
|
||||
code: string,
|
||||
name: string = 'Магнит'
|
||||
): Promise<Store> {
|
||||
try {
|
||||
let store = await this.prisma.store.findFirst({
|
||||
where: { code },
|
||||
});
|
||||
const existing = await this.db
|
||||
.select()
|
||||
.from(stores)
|
||||
.where(eq(stores.code, code))
|
||||
.limit(1);
|
||||
|
||||
if (!store) {
|
||||
Logger.info(`Создание нового магазина: ${code}`);
|
||||
store = await this.prisma.store.create({
|
||||
data: {
|
||||
name,
|
||||
type: 'web',
|
||||
code,
|
||||
},
|
||||
});
|
||||
if (existing.length > 0) {
|
||||
return existing[0] as Store;
|
||||
}
|
||||
|
||||
return store;
|
||||
Logger.info(`Создание нового магазина: ${code}`);
|
||||
const result = await this.db
|
||||
.insert(stores)
|
||||
.values({
|
||||
name,
|
||||
type: 'web',
|
||||
code,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.returning();
|
||||
|
||||
return result[0] as Store;
|
||||
} catch (error) {
|
||||
Logger.error('Ошибка получения/создания магазина:', error);
|
||||
throw new DatabaseError(
|
||||
@@ -177,35 +239,41 @@ export class ProductService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Получение или создание категории
|
||||
*/
|
||||
async getOrCreateCategory(
|
||||
externalId: number,
|
||||
name: string
|
||||
): Promise<Category> {
|
||||
try {
|
||||
let category = await this.prisma.category.findFirst({
|
||||
where: { externalId },
|
||||
});
|
||||
const existing = await this.db
|
||||
.select()
|
||||
.from(categories)
|
||||
.where(eq(categories.externalId, externalId))
|
||||
.limit(1);
|
||||
|
||||
if (!category) {
|
||||
Logger.info(`Создание новой категории: ${name} (${externalId})`);
|
||||
category = await this.prisma.category.create({
|
||||
data: {
|
||||
externalId,
|
||||
name,
|
||||
},
|
||||
});
|
||||
} else if (category.name !== name) {
|
||||
// Обновляем название категории, если изменилось
|
||||
category = await this.prisma.category.update({
|
||||
where: { id: category.id },
|
||||
data: { name },
|
||||
});
|
||||
if (existing.length > 0) {
|
||||
const category = existing[0] as Category;
|
||||
if (category.name !== name) {
|
||||
const result = await this.db
|
||||
.update(categories)
|
||||
.set({ name, updatedAt: new Date() })
|
||||
.where(eq(categories.id, category.id))
|
||||
.returning();
|
||||
return result[0] as Category;
|
||||
}
|
||||
return category;
|
||||
}
|
||||
|
||||
return category;
|
||||
Logger.info(`Создание новой категории: ${name} (${externalId})`);
|
||||
const result = await this.db
|
||||
.insert(categories)
|
||||
.values({
|
||||
externalId,
|
||||
name,
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.returning();
|
||||
|
||||
return result[0] as Category;
|
||||
} catch (error) {
|
||||
Logger.error('Ошибка получения/создания категории:', error);
|
||||
throw new DatabaseError(
|
||||
@@ -215,30 +283,33 @@ export class ProductService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Получение товаров, для которых не были получены детали
|
||||
*/
|
||||
async getProductsNeedingDetails(storeCode: string, limit?: number): Promise<Product[]> {
|
||||
try {
|
||||
// Сначала находим store по code
|
||||
const store = await this.prisma.store.findFirst({
|
||||
where: { code: storeCode },
|
||||
});
|
||||
const storeResult = await this.db
|
||||
.select()
|
||||
.from(stores)
|
||||
.where(eq(stores.code, storeCode))
|
||||
.limit(1);
|
||||
|
||||
if (!store) {
|
||||
if (storeResult.length === 0) {
|
||||
throw new DatabaseError(`Магазин с кодом ${storeCode} не найден`);
|
||||
}
|
||||
|
||||
return await this.prisma.product.findMany({
|
||||
where: {
|
||||
storeId: store.id,
|
||||
isDetailsFetched: false,
|
||||
},
|
||||
take: limit,
|
||||
orderBy: {
|
||||
id: 'asc',
|
||||
},
|
||||
});
|
||||
const store = storeResult[0];
|
||||
|
||||
const result = await this.db
|
||||
.select()
|
||||
.from(products)
|
||||
.where(
|
||||
and(
|
||||
eq(products.storeId, store.id),
|
||||
eq(products.isDetailsFetched, false)
|
||||
)
|
||||
)
|
||||
.orderBy(asc(products.id))
|
||||
.limit(limit || 1000);
|
||||
|
||||
return result as Product[];
|
||||
} catch (error) {
|
||||
Logger.error('Ошибка получения товаров для обогащения:', error);
|
||||
throw new DatabaseError(
|
||||
@@ -248,9 +319,6 @@ export class ProductService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Обновление деталей товара (бренд, описание, вес, единица измерения, рейтинг)
|
||||
*/
|
||||
async updateProductDetails(
|
||||
externalId: string,
|
||||
storeId: number,
|
||||
@@ -267,18 +335,23 @@ export class ProductService {
|
||||
}
|
||||
): Promise<Product> {
|
||||
try {
|
||||
return await this.prisma.product.update({
|
||||
where: {
|
||||
externalId_storeId: {
|
||||
externalId,
|
||||
storeId,
|
||||
},
|
||||
},
|
||||
data: {
|
||||
const result = await this.db
|
||||
.update(products)
|
||||
.set({
|
||||
...details,
|
||||
rating: details.rating?.toString(),
|
||||
isDetailsFetched: true,
|
||||
},
|
||||
});
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(
|
||||
and(
|
||||
eq(products.externalId, externalId),
|
||||
eq(products.storeId, storeId)
|
||||
)
|
||||
)
|
||||
.returning();
|
||||
|
||||
return result[0] as Product;
|
||||
} catch (error) {
|
||||
Logger.error('Ошибка обновления деталей товара:', error);
|
||||
throw new DatabaseError(
|
||||
@@ -288,22 +361,23 @@ export class ProductService {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Отметить товар как обработанный (даже если детали не были получены)
|
||||
*/
|
||||
async markAsDetailsFetched(externalId: string, storeId: number): Promise<Product> {
|
||||
try {
|
||||
return await this.prisma.product.update({
|
||||
where: {
|
||||
externalId_storeId: {
|
||||
externalId,
|
||||
storeId,
|
||||
},
|
||||
},
|
||||
data: {
|
||||
const result = await this.db
|
||||
.update(products)
|
||||
.set({
|
||||
isDetailsFetched: true,
|
||||
},
|
||||
});
|
||||
updatedAt: new Date(),
|
||||
})
|
||||
.where(
|
||||
and(
|
||||
eq(products.externalId, externalId),
|
||||
eq(products.storeId, storeId)
|
||||
)
|
||||
)
|
||||
.returning();
|
||||
|
||||
return result[0] as Product;
|
||||
} catch (error) {
|
||||
Logger.error('Ошибка отметки товара как обработанного:', error);
|
||||
throw new DatabaseError(
|
||||
@@ -313,4 +387,3 @@ export class ProductService {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user