Migrate from Prisma to Drizzle ORM

Co-Authored-By: Oz <oz-agent@warp.dev>
This commit is contained in:
2026-04-05 01:18:08 +05:00
parent 912946bb00
commit b6f5138390
18 changed files with 1699 additions and 1100 deletions

View File

@@ -4,7 +4,7 @@ Guidelines for AI coding agents working on this repository.
## Project Overview
TypeScript-based scraper for Russian supermarkets (Magnit). Uses Playwright for sessions, Axios for API, PostgreSQL with Prisma ORM.
TypeScript-based scraper for Russian supermarkets (Magnit). Uses Playwright for sessions, Axios for API, PostgreSQL with Drizzle ORM.
## Build & Run Commands
@@ -20,12 +20,13 @@ pnpm enrich # Run product enrichment
pnpm test-db # Test database connection
```
### Prisma Commands
### Drizzle Commands
```bash
pnpm prisma:generate # Generate client after schema changes
pnpm prisma:migrate # Create and apply migrations
pnpm prisma:studio # Open database GUI
pnpm db:generate # Generate migration files
pnpm db:migrate # Apply migrations
pnpm db:push # Push schema changes directly (dev only)
pnpm db:studio # Open database GUI
```
### Running Scripts Directly
@@ -45,13 +46,15 @@ No test framework configured. Manual testing via `pnpm test-db`, `pnpm dev`, Pri
1. External packages first, then internal modules
2. **Always include `.js` extension** for local imports (ESM)
3. Use named imports from Prisma client
3. Use named imports from Drizzle schema
```typescript
import { chromium, Browser } from 'playwright';
import axios from 'axios';
import { Logger } from '../../../utils/logger.js';
import { PrismaClient } from '../../../../generated/prisma/client.js';
import { db } from '../../../config/database.js';
import { products, stores, categories } from '../../../db/schema.js';
import { eq, and, asc } from 'drizzle-orm';
```
### Naming Conventions
@@ -113,15 +116,19 @@ Logger.debug('Debug'); // Only when DEBUG=true
### Services Pattern
- Services receive `PrismaClient` via constructor (DI)
- Services receive `db` (Drizzle instance) via constructor (DI)
- Use `getOrCreate` for idempotent operations
- Never call Prisma directly from scrapers
- Never call Drizzle directly from scrapers
### Database Patterns
- Upsert via composite unique `(externalId, storeId)`
- Upsert via composite unique constraint on `(externalId, storeId)`
- Batch processing: 50 items per batch
- Prices: Float (rubles), converted from kopecks
- Prices: Decimal (rubles), stored as decimal type
- Use `.select().from().where()` for queries
- Use `.insert().values()` for inserts
- Use `.update().set().where()` for updates
- Use `.delete().where()` for deletes
### Comments

11
drizzle.config.ts Normal file
View File

@@ -0,0 +1,11 @@
import 'dotenv/config';
import { defineConfig } from 'drizzle-kit';
export default defineConfig({
schema: './src/db/schema.ts',
out: './drizzle',
dialect: 'postgresql',
dbCredentials: {
url: process.env.DATABASE_URL!,
},
});

View File

@@ -0,0 +1,71 @@
CREATE TABLE "categories" (
"id" integer PRIMARY KEY GENERATED ALWAYS AS IDENTITY (sequence name "categories_id_seq" INCREMENT BY 1 MINVALUE 1 MAXVALUE 2147483647 START WITH 1 CACHE 1),
"externalId" integer,
"name" varchar(255) NOT NULL,
"parentId" integer,
"description" text,
"createdAt" timestamp DEFAULT now() NOT NULL,
"updatedAt" timestamp DEFAULT now()
);
--> statement-breakpoint
CREATE TABLE "products" (
"id" integer PRIMARY KEY GENERATED ALWAYS AS IDENTITY (sequence name "products_id_seq" INCREMENT BY 1 MINVALUE 1 MAXVALUE 2147483647 START WITH 1 CACHE 1),
"externalId" varchar(50) NOT NULL,
"storeId" integer NOT NULL,
"categoryId" integer,
"name" varchar(500) NOT NULL,
"description" text,
"url" text,
"imageUrl" text,
"currentPrice" numeric(10, 2) NOT NULL,
"unit" varchar(50),
"weight" varchar(100),
"brand" varchar(255),
"oldPrice" numeric(10, 2),
"discountPercent" numeric(5, 2),
"promotionEndDate" timestamp,
"rating" numeric(3, 2),
"scoresCount" integer,
"commentsCount" integer,
"quantity" integer,
"badges" text,
"isDetailsFetched" boolean DEFAULT false NOT NULL,
"createdAt" timestamp DEFAULT now() NOT NULL,
"updatedAt" timestamp DEFAULT now(),
CONSTRAINT "products_externalId_storeId_unique" UNIQUE("externalId","storeId")
);
--> statement-breakpoint
CREATE TABLE "scraping_sessions" (
"id" integer PRIMARY KEY GENERATED ALWAYS AS IDENTITY (sequence name "scraping_sessions_id_seq" INCREMENT BY 1 MINVALUE 1 MAXVALUE 2147483647 START WITH 1 CACHE 1),
"storeId" integer NOT NULL,
"sourceType" varchar(50) NOT NULL,
"status" varchar(50) NOT NULL,
"startedAt" timestamp DEFAULT now() NOT NULL,
"finishedAt" timestamp,
"error" text
);
--> statement-breakpoint
CREATE TABLE "stores" (
"id" integer PRIMARY KEY GENERATED ALWAYS AS IDENTITY (sequence name "stores_id_seq" INCREMENT BY 1 MINVALUE 1 MAXVALUE 2147483647 START WITH 1 CACHE 1),
"name" varchar(255) NOT NULL,
"type" varchar(50) NOT NULL,
"code" varchar(50),
"url" text,
"region" varchar(255),
"address" text,
"createdAt" timestamp DEFAULT now() NOT NULL,
"updatedAt" timestamp DEFAULT now()
);
--> statement-breakpoint
ALTER TABLE "products" ADD CONSTRAINT "products_storeId_stores_id_fk" FOREIGN KEY ("storeId") REFERENCES "public"."stores"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "products" ADD CONSTRAINT "products_categoryId_categories_id_fk" FOREIGN KEY ("categoryId") REFERENCES "public"."categories"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint
ALTER TABLE "scraping_sessions" ADD CONSTRAINT "scraping_sessions_storeId_stores_id_fk" FOREIGN KEY ("storeId") REFERENCES "public"."stores"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint
CREATE INDEX "categories_externalId_idx" ON "categories" USING btree ("externalId");--> statement-breakpoint
CREATE INDEX "categories_parentId_idx" ON "categories" USING btree ("parentId");--> statement-breakpoint
CREATE INDEX "products_storeId_idx" ON "products" USING btree ("storeId");--> statement-breakpoint
CREATE INDEX "products_categoryId_idx" ON "products" USING btree ("categoryId");--> statement-breakpoint
CREATE INDEX "products_externalId_idx" ON "products" USING btree ("externalId");--> statement-breakpoint
CREATE INDEX "scraping_sessions_storeId_idx" ON "scraping_sessions" USING btree ("storeId");--> statement-breakpoint
CREATE INDEX "scraping_sessions_status_idx" ON "scraping_sessions" USING btree ("status");--> statement-breakpoint
CREATE INDEX "scraping_sessions_startedAt_idx" ON "scraping_sessions" USING btree ("startedAt");--> statement-breakpoint
CREATE INDEX "stores_code_idx" ON "stores" USING btree ("code");

View File

@@ -0,0 +1,588 @@
{
"id": "0c32aa7e-a09f-4c32-a8bd-35af715f5fde",
"prevId": "00000000-0000-0000-0000-000000000000",
"version": "7",
"dialect": "postgresql",
"tables": {
"public.categories": {
"name": "categories",
"schema": "",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"identity": {
"type": "always",
"name": "categories_id_seq",
"schema": "public",
"increment": "1",
"startWith": "1",
"minValue": "1",
"maxValue": "2147483647",
"cache": "1",
"cycle": false
}
},
"externalId": {
"name": "externalId",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"name": {
"name": "name",
"type": "varchar(255)",
"primaryKey": false,
"notNull": true
},
"parentId": {
"name": "parentId",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false
},
"createdAt": {
"name": "createdAt",
"type": "timestamp",
"primaryKey": false,
"notNull": true,
"default": "now()"
},
"updatedAt": {
"name": "updatedAt",
"type": "timestamp",
"primaryKey": false,
"notNull": false,
"default": "now()"
}
},
"indexes": {
"categories_externalId_idx": {
"name": "categories_externalId_idx",
"columns": [
{
"expression": "externalId",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
},
"categories_parentId_idx": {
"name": "categories_parentId_idx",
"columns": [
{
"expression": "parentId",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"policies": {},
"checkConstraints": {},
"isRLSEnabled": false
},
"public.products": {
"name": "products",
"schema": "",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"identity": {
"type": "always",
"name": "products_id_seq",
"schema": "public",
"increment": "1",
"startWith": "1",
"minValue": "1",
"maxValue": "2147483647",
"cache": "1",
"cycle": false
}
},
"externalId": {
"name": "externalId",
"type": "varchar(50)",
"primaryKey": false,
"notNull": true
},
"storeId": {
"name": "storeId",
"type": "integer",
"primaryKey": false,
"notNull": true
},
"categoryId": {
"name": "categoryId",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"name": {
"name": "name",
"type": "varchar(500)",
"primaryKey": false,
"notNull": true
},
"description": {
"name": "description",
"type": "text",
"primaryKey": false,
"notNull": false
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": false
},
"imageUrl": {
"name": "imageUrl",
"type": "text",
"primaryKey": false,
"notNull": false
},
"currentPrice": {
"name": "currentPrice",
"type": "numeric(10, 2)",
"primaryKey": false,
"notNull": true
},
"unit": {
"name": "unit",
"type": "varchar(50)",
"primaryKey": false,
"notNull": false
},
"weight": {
"name": "weight",
"type": "varchar(100)",
"primaryKey": false,
"notNull": false
},
"brand": {
"name": "brand",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"oldPrice": {
"name": "oldPrice",
"type": "numeric(10, 2)",
"primaryKey": false,
"notNull": false
},
"discountPercent": {
"name": "discountPercent",
"type": "numeric(5, 2)",
"primaryKey": false,
"notNull": false
},
"promotionEndDate": {
"name": "promotionEndDate",
"type": "timestamp",
"primaryKey": false,
"notNull": false
},
"rating": {
"name": "rating",
"type": "numeric(3, 2)",
"primaryKey": false,
"notNull": false
},
"scoresCount": {
"name": "scoresCount",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"commentsCount": {
"name": "commentsCount",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"quantity": {
"name": "quantity",
"type": "integer",
"primaryKey": false,
"notNull": false
},
"badges": {
"name": "badges",
"type": "text",
"primaryKey": false,
"notNull": false
},
"isDetailsFetched": {
"name": "isDetailsFetched",
"type": "boolean",
"primaryKey": false,
"notNull": true,
"default": false
},
"createdAt": {
"name": "createdAt",
"type": "timestamp",
"primaryKey": false,
"notNull": true,
"default": "now()"
},
"updatedAt": {
"name": "updatedAt",
"type": "timestamp",
"primaryKey": false,
"notNull": false,
"default": "now()"
}
},
"indexes": {
"products_storeId_idx": {
"name": "products_storeId_idx",
"columns": [
{
"expression": "storeId",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
},
"products_categoryId_idx": {
"name": "products_categoryId_idx",
"columns": [
{
"expression": "categoryId",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
},
"products_externalId_idx": {
"name": "products_externalId_idx",
"columns": [
{
"expression": "externalId",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
}
},
"foreignKeys": {
"products_storeId_stores_id_fk": {
"name": "products_storeId_stores_id_fk",
"tableFrom": "products",
"tableTo": "stores",
"columnsFrom": [
"storeId"
],
"columnsTo": [
"id"
],
"onDelete": "no action",
"onUpdate": "no action"
},
"products_categoryId_categories_id_fk": {
"name": "products_categoryId_categories_id_fk",
"tableFrom": "products",
"tableTo": "categories",
"columnsFrom": [
"categoryId"
],
"columnsTo": [
"id"
],
"onDelete": "no action",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {
"products_externalId_storeId_unique": {
"name": "products_externalId_storeId_unique",
"nullsNotDistinct": false,
"columns": [
"externalId",
"storeId"
]
}
},
"policies": {},
"checkConstraints": {},
"isRLSEnabled": false
},
"public.scraping_sessions": {
"name": "scraping_sessions",
"schema": "",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"identity": {
"type": "always",
"name": "scraping_sessions_id_seq",
"schema": "public",
"increment": "1",
"startWith": "1",
"minValue": "1",
"maxValue": "2147483647",
"cache": "1",
"cycle": false
}
},
"storeId": {
"name": "storeId",
"type": "integer",
"primaryKey": false,
"notNull": true
},
"sourceType": {
"name": "sourceType",
"type": "varchar(50)",
"primaryKey": false,
"notNull": true
},
"status": {
"name": "status",
"type": "varchar(50)",
"primaryKey": false,
"notNull": true
},
"startedAt": {
"name": "startedAt",
"type": "timestamp",
"primaryKey": false,
"notNull": true,
"default": "now()"
},
"finishedAt": {
"name": "finishedAt",
"type": "timestamp",
"primaryKey": false,
"notNull": false
},
"error": {
"name": "error",
"type": "text",
"primaryKey": false,
"notNull": false
}
},
"indexes": {
"scraping_sessions_storeId_idx": {
"name": "scraping_sessions_storeId_idx",
"columns": [
{
"expression": "storeId",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
},
"scraping_sessions_status_idx": {
"name": "scraping_sessions_status_idx",
"columns": [
{
"expression": "status",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
},
"scraping_sessions_startedAt_idx": {
"name": "scraping_sessions_startedAt_idx",
"columns": [
{
"expression": "startedAt",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
}
},
"foreignKeys": {
"scraping_sessions_storeId_stores_id_fk": {
"name": "scraping_sessions_storeId_stores_id_fk",
"tableFrom": "scraping_sessions",
"tableTo": "stores",
"columnsFrom": [
"storeId"
],
"columnsTo": [
"id"
],
"onDelete": "no action",
"onUpdate": "no action"
}
},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"policies": {},
"checkConstraints": {},
"isRLSEnabled": false
},
"public.stores": {
"name": "stores",
"schema": "",
"columns": {
"id": {
"name": "id",
"type": "integer",
"primaryKey": true,
"notNull": true,
"identity": {
"type": "always",
"name": "stores_id_seq",
"schema": "public",
"increment": "1",
"startWith": "1",
"minValue": "1",
"maxValue": "2147483647",
"cache": "1",
"cycle": false
}
},
"name": {
"name": "name",
"type": "varchar(255)",
"primaryKey": false,
"notNull": true
},
"type": {
"name": "type",
"type": "varchar(50)",
"primaryKey": false,
"notNull": true
},
"code": {
"name": "code",
"type": "varchar(50)",
"primaryKey": false,
"notNull": false
},
"url": {
"name": "url",
"type": "text",
"primaryKey": false,
"notNull": false
},
"region": {
"name": "region",
"type": "varchar(255)",
"primaryKey": false,
"notNull": false
},
"address": {
"name": "address",
"type": "text",
"primaryKey": false,
"notNull": false
},
"createdAt": {
"name": "createdAt",
"type": "timestamp",
"primaryKey": false,
"notNull": true,
"default": "now()"
},
"updatedAt": {
"name": "updatedAt",
"type": "timestamp",
"primaryKey": false,
"notNull": false,
"default": "now()"
}
},
"indexes": {
"stores_code_idx": {
"name": "stores_code_idx",
"columns": [
{
"expression": "code",
"isExpression": false,
"asc": true,
"nulls": "last"
}
],
"isUnique": false,
"concurrently": false,
"method": "btree",
"with": {}
}
},
"foreignKeys": {},
"compositePrimaryKeys": {},
"uniqueConstraints": {},
"policies": {},
"checkConstraints": {},
"isRLSEnabled": false
}
},
"enums": {},
"schemas": {},
"sequences": {},
"roles": {},
"policies": {},
"views": {},
"_meta": {
"columns": {},
"schemas": {},
"tables": {}
}
}

View File

@@ -0,0 +1,13 @@
{
"version": "7",
"dialect": "postgresql",
"entries": [
{
"idx": 0,
"version": "7",
"when": 1769461513369,
"tag": "0000_common_mole_man",
"breakpoints": true
}
]
}

View File

@@ -10,10 +10,10 @@
"dev": "tsx src/scripts/scrape-magnit-products.ts",
"enrich": "tsx src/scripts/enrich-product-details.ts",
"test-db": "tsx src/scripts/test-db-connection.ts",
"prisma:generate": "prisma generate",
"prisma:migrate": "prisma migrate dev",
"prisma:studio": "prisma studio --config=prisma.config.ts",
"prisma:format": "prisma format"
"db:generate": "drizzle-kit generate",
"db:migrate": "drizzle-kit migrate",
"db:push": "drizzle-kit push",
"db:studio": "drizzle-kit studio"
},
"keywords": [
"scraper",
@@ -23,17 +23,16 @@
"author": "",
"license": "ISC",
"dependencies": {
"@prisma/adapter-pg": "^7.2.0",
"@prisma/client": "^7.2.0",
"axios": "^1.13.2",
"dotenv": "^17.2.3",
"drizzle-orm": "^0.45.1",
"pg": "^8.16.3",
"playwright": "^1.57.0"
},
"devDependencies": {
"@types/node": "^25.0.3",
"@types/pg": "^8.16.0",
"prisma": "^7.2.0",
"drizzle-kit": "^0.31.8",
"ts-node": "^10.9.2",
"tsx": "^4.21.0",
"typescript": "^5.9.3"

1386
pnpm-lock.yaml generated

File diff suppressed because it is too large Load Diff

View File

@@ -1,12 +0,0 @@
import 'dotenv/config'
import { defineConfig } from 'prisma/config'
export default defineConfig({
schema: 'src/database/prisma/schema.prisma',
migrations: {
path: 'src/database/prisma/migrations',
},
datasource: {
url: process.env.DATABASE_URL,
},
})

View File

@@ -1,15 +1,12 @@
import "dotenv/config";
import { PrismaPg } from '@prisma/adapter-pg';
import { PrismaClient } from '../../generated/prisma/client.js';
import 'dotenv/config';
import { db } from '../database/client.js';
import { stores } from '../db/schema.js';
const connectionString = `${process.env.DATABASE_URL}`;
const adapter = new PrismaPg({ connectionString });
export const prisma = new PrismaClient({ adapter });
export { db };
export async function connectDatabase() {
try {
await prisma.$connect();
await db.select().from(stores).limit(1);
console.log('✅ Подключение к базе данных установлено');
} catch (error) {
console.error('❌ Ошибка подключения к базе данных:', error);
@@ -18,7 +15,6 @@ export async function connectDatabase() {
}
export async function disconnectDatabase() {
await prisma.$disconnect();
await db.$client.end();
console.log('✅ Отключение от базы данных');
}

View File

@@ -1,11 +1,12 @@
import "dotenv/config";
import { PrismaPg } from '@prisma/adapter-pg';
import { PrismaClient } from '../../generated/prisma/client.js';
import 'dotenv/config';
import { drizzle } from 'drizzle-orm/node-postgres';
import { Pool } from 'pg';
import * as schema from '../db/schema.js';
const connectionString = `${process.env.DATABASE_URL}`;
const connectionString = process.env.DATABASE_URL;
const adapter = new PrismaPg({ connectionString });
export const prisma = new PrismaClient({ adapter });
const pool = new Pool({ connectionString });
export default prisma;
export const db = drizzle(pool, { schema });
export default db;

View File

@@ -1,104 +0,0 @@
// This is your Prisma schema file,
// learn more about it in the docs: https://pris.ly/d/prisma-schema
generator client {
provider = "prisma-client"
output = "../../../generated/prisma"
}
datasource db {
provider = "postgresql"
}
model Store {
id Int @id @default(autoincrement())
name String
type String // "web" | "app"
code String? // storeCode для API (например "992301")
url String?
region String?
address String?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
products Product[]
sessions ScrapingSession[]
@@index([code])
}
model Category {
id Int @id @default(autoincrement())
externalId Int? // ID из внешнего API
name String
parentId Int?
parent Category? @relation("CategoryHierarchy", fields: [parentId], references: [id])
children Category[] @relation("CategoryHierarchy")
description String?
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
products Product[]
@@index([externalId])
@@index([parentId])
}
model Product {
id Int @id @default(autoincrement())
externalId String // ID из API (например "1000300796")
storeId Int
categoryId Int?
name String
description String?
url String?
imageUrl String?
currentPrice Decimal @db.Decimal(10, 2)
unit String? // единица измерения
weight String? // вес/объем
brand String?
// Промо-информация
oldPrice Decimal? @db.Decimal(10, 2) // старая цена при акции
discountPercent Decimal? @db.Decimal(5, 2) // процент скидки
promotionEndDate DateTime? // дата окончания акции
// Рейтинги
rating Decimal? @db.Decimal(3, 2) // рейтинг товара
scoresCount Int? // количество оценок
commentsCount Int? // количество комментариев
// Остаток и бейджи
quantity Int? // остаток на складе
badges String? // массив бейджей в формате JSON
// Детальная информация
isDetailsFetched Boolean @default(false) // были ли получены детали через detail endpoint
createdAt DateTime @default(now())
updatedAt DateTime @updatedAt
store Store @relation(fields: [storeId], references: [id])
category Category? @relation(fields: [categoryId], references: [id])
@@unique([externalId, storeId])
@@index([storeId])
@@index([categoryId])
@@index([externalId])
}
model ScrapingSession {
id Int @id @default(autoincrement())
storeId Int
sourceType String // "api" | "web" | "app"
status String // "pending" | "running" | "completed" | "failed"
startedAt DateTime @default(now())
finishedAt DateTime?
error String?
store Store @relation(fields: [storeId], references: [id])
@@index([storeId])
@@index([status])
@@index([startedAt])
}

73
src/db/schema.ts Normal file
View File

@@ -0,0 +1,73 @@
import { pgTable, integer, varchar, text, decimal, timestamp, index, unique, boolean } from 'drizzle-orm/pg-core';
export const stores = pgTable('stores', {
id: integer().primaryKey().generatedAlwaysAsIdentity(),
name: varchar({ length: 255 }).notNull(),
type: varchar({ length: 50 }).notNull(),
code: varchar({ length: 50 }),
url: text(),
region: varchar({ length: 255 }),
address: text(),
createdAt: timestamp().defaultNow().notNull(),
updatedAt: timestamp().defaultNow(),
}, (table) => [
index('stores_code_idx').on(table.code),
]);
export const categories = pgTable('categories', {
id: integer().primaryKey().generatedAlwaysAsIdentity(),
externalId: integer(),
name: varchar({ length: 255 }).notNull(),
parentId: integer(),
description: text(),
createdAt: timestamp().defaultNow().notNull(),
updatedAt: timestamp().defaultNow(),
}, (table) => [
index('categories_externalId_idx').on(table.externalId),
index('categories_parentId_idx').on(table.parentId),
]);
export const products = pgTable('products', {
id: integer().primaryKey().generatedAlwaysAsIdentity(),
externalId: varchar({ length: 50 }).notNull(),
storeId: integer().notNull().references(() => stores.id),
categoryId: integer().references(() => categories.id),
name: varchar({ length: 500 }).notNull(),
description: text(),
url: text(),
imageUrl: text(),
currentPrice: decimal({ precision: 10, scale: 2 }).notNull(),
unit: varchar({ length: 50 }),
weight: varchar({ length: 100 }),
brand: varchar({ length: 255 }),
oldPrice: decimal({ precision: 10, scale: 2 }),
discountPercent: decimal({ precision: 5, scale: 2 }),
promotionEndDate: timestamp(),
rating: decimal({ precision: 3, scale: 2 }),
scoresCount: integer(),
commentsCount: integer(),
quantity: integer(),
badges: text(),
isDetailsFetched: boolean().default(false).notNull(),
createdAt: timestamp().defaultNow().notNull(),
updatedAt: timestamp().defaultNow(),
}, (table) => [
unique('products_externalId_storeId_unique').on(table.externalId, table.storeId),
index('products_storeId_idx').on(table.storeId),
index('products_categoryId_idx').on(table.categoryId),
index('products_externalId_idx').on(table.externalId),
]);
export const scrapingSessions = pgTable('scraping_sessions', {
id: integer().primaryKey().generatedAlwaysAsIdentity(),
storeId: integer().notNull().references(() => stores.id),
sourceType: varchar({ length: 50 }).notNull(),
status: varchar({ length: 50 }).notNull(),
startedAt: timestamp().defaultNow().notNull(),
finishedAt: timestamp(),
error: text(),
}, (table) => [
index('scraping_sessions_storeId_idx').on(table.storeId),
index('scraping_sessions_status_idx').on(table.status),
index('scraping_sessions_startedAt_idx').on(table.startedAt),
]);

View File

@@ -13,7 +13,6 @@ import {
} from './types.js';
import { ProductService } from '../../../services/product/ProductService.js';
import { ProductParser } from '../../../services/parser/ProductParser.js';
import { PrismaClient } from '../../../../generated/prisma/client.js';
import { withRetryAndReinit } from '../../../utils/retry.js';
export interface MagnitScraperConfig {
@@ -510,12 +509,12 @@ export class MagnitApiScraper {
*/
async saveToDatabase(
products: ProductItem[],
prisma: PrismaClient
drizzleDb: any
): Promise<number> {
try {
Logger.info(`Начало сохранения ${products.length} товаров в БД...`);
const productService = new ProductService(prisma);
const productService = new ProductService(drizzleDb);
// Получаем или создаем магазин
const store = await productService.getOrCreateStore(
@@ -558,13 +557,13 @@ export class MagnitApiScraper {
* Более эффективно для больших каталогов
*/
async saveToDatabaseStreaming(
prisma: PrismaClient,
drizzleDb: any,
options: {
batchSize?: number; // default: 50
maxProducts?: number;
} = {}
): Promise<number> {
const productService = new ProductService(prisma);
const productService = new ProductService(drizzleDb);
let totalSaved = 0;
// Получаем магазин один раз в начале

View File

@@ -1,93 +1,46 @@
import 'dotenv/config';
import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js';
import { connectDatabase, disconnectDatabase, db } from '../config/database.js';
import { products, categories } from '../db/schema.js';
import { isNull, and, not } from 'drizzle-orm';
import { Logger } from '../utils/logger.js';
async function main() {
try {
await connectDatabase();
// Check total products and null categoryId count
const totalProducts = await prisma.product.count();
const nullCategoryCount = await prisma.product.count({
where: { categoryId: null }
});
const withCategoryCount = await prisma.product.count({
where: { categoryId: { not: null } }
});
const allProducts = await db.select().from(products);
const totalProducts = allProducts.length;
const nullCategoryCount = allProducts.filter(p => p.categoryId === null).length;
const withCategoryCount = totalProducts - nullCategoryCount;
Logger.info('\n📊 СТАТИСТИКА ПО КАТЕГОРИЯМ:');
Logger.info(`Всего товаров: ${totalProducts}`);
Logger.info(`Товаров без категории (null): ${nullCategoryCount} (${((nullCategoryCount / totalProducts) * 100).toFixed(2)}%)`);
Logger.info(`Товаров с категорией: ${withCategoryCount} (${((withCategoryCount / totalProducts) * 100).toFixed(2)}%)`);
// Check total categories
const totalCategories = await prisma.category.count();
Logger.info(`\nВсего категорий в БД: ${totalCategories}`);
const allCategories = await db.select().from(categories);
Logger.info(`\nВсего категорий в БД: ${allCategories.length}`);
// Sample categories
if (totalCategories > 0) {
const sampleCategories = await prisma.category.findMany({
take: 5,
select: {
id: true,
externalId: true,
name: true,
_count: {
select: { products: true }
}
}
});
if (allCategories.length > 0) {
const sampleCategories = allCategories.slice(0, 5);
Logger.info('\n📁 Примеры категорий:');
sampleCategories.forEach(cat => {
Logger.info(` - [${cat.externalId}] ${cat.name} (товаров: ${cat._count.products})`);
sampleCategories.forEach((cat: any) => {
const productsCount = allProducts.filter(p => p.categoryId === cat.id).length;
Logger.info(` - [${cat.externalId}] ${cat.name} (товаров: ${productsCount})`);
});
}
// Sample products without categories
const productsWithoutCategory = await prisma.product.findMany({
where: { categoryId: null },
take: 5,
select: {
id: true,
externalId: true,
name: true,
currentPrice: true
}
});
Logger.info('\n❌ Примеры товаров БЕЗ категории:');
productsWithoutCategory.forEach(p => {
Logger.info(` - [${p.externalId}] ${p.name} (₽${p.currentPrice})`);
});
// Sample products with categories
const productsWithCategory = await prisma.product.findMany({
where: { categoryId: { not: null } },
take: 5,
select: {
id: true,
externalId: true,
name: true,
currentPrice: true,
category: {
select: {
externalId: true,
name: true
}
}
}
});
if (productsWithCategory.length > 0) {
Logger.info('\n✅ Примеры товаров С категорией:');
productsWithCategory.forEach(p => {
Logger.info(` - [${p.externalId}] ${p.name} → [${p.category?.externalId}] ${p.category?.name}`);
const productsWithoutCategory = allProducts.filter(p => p.categoryId === null).slice(0, 5);
if (productsWithoutCategory.length > 0) {
Logger.info('\n📦 Примеры товаров без категории:');
productsWithoutCategory.forEach((p: any) => {
Logger.info(` - [${p.externalId}] ${p.name}`);
});
}
} catch (error) {
Logger.error('❌ Ошибка при анализе:', error);
Logger.error('❌ Ошибка:', error);
process.exit(1);
} finally {
await disconnectDatabase();

View File

@@ -1,30 +1,17 @@
import 'dotenv/config';
import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js';
import { connectDatabase, disconnectDatabase, db } from '../config/database.js';
import { products } from '../db/schema.js';
import { Logger } from '../utils/logger.js';
async function main() {
try {
await connectDatabase();
// Get a sample product with all fields
const product = await prisma.product.findFirst({
select: {
id: true,
externalId: true,
name: true,
description: true,
currentPrice: true,
unit: true,
weight: true,
brand: true,
categoryId: true,
badges: true,
}
});
const result = await db.select().from(products).limit(1);
if (product) {
if (result.length > 0) {
Logger.info('=== ДЕТАЛИ ТОВАРА ИЗ БД ===');
Logger.info(JSON.stringify(product, null, 2));
Logger.info(JSON.stringify(result[0], null, 2));
}
} catch (error) {

View File

@@ -1,5 +1,5 @@
import 'dotenv/config';
import { prisma } from '../config/database.js';
import { db } from '../config/database.js';
import { MagnitApiScraper } from '../scrapers/api/magnit/MagnitApiScraper.js';
import { ProductService } from '../services/product/ProductService.js';
import { ProductParser } from '../services/parser/ProductParser.js';
@@ -33,7 +33,7 @@ async function main() {
}
// Инициализация
const productService = new ProductService(prisma);
const productService = new ProductService(db);
const scraper = new MagnitApiScraper({
storeCode,
rateLimitDelay: 300,

View File

@@ -1,6 +1,6 @@
import 'dotenv/config';
import { MagnitApiScraper } from '../scrapers/api/magnit/MagnitApiScraper.js';
import { connectDatabase, disconnectDatabase, prisma } from '../config/database.js';
import { connectDatabase, disconnectDatabase, db } from '../config/database.js';
import { Logger } from '../utils/logger.js';
async function main() {
@@ -64,7 +64,7 @@ async function main() {
// STREAMING режим (рекомендуется для больших каталогов)
Logger.info('📡 Использование потокового режима скрапинга');
saved = await scraper.saveToDatabaseStreaming(prisma, {
saved = await scraper.saveToDatabaseStreaming(db, {
batchSize: 50,
maxProducts,
});
@@ -77,7 +77,7 @@ async function main() {
Logger.info(`📦 Получено товаров: ${products.length}`);
if (products.length > 0) {
saved = await scraper.saveToDatabase(products, prisma);
saved = await scraper.saveToDatabase(products, db);
} else {
Logger.warn('⚠️ Товары не найдены');
}

View File

@@ -1,4 +1,6 @@
import { PrismaClient, Product, Store, Category } from '../../../generated/prisma/client.js';
import { db } from '../../database/client.js';
import { products, stores, categories } from '../../db/schema.js';
import { eq, and, asc } from 'drizzle-orm';
import { Logger } from '../../utils/logger.js';
import { DatabaseError } from '../../utils/errors.js';
@@ -25,64 +27,124 @@ export interface CreateProductData {
isDetailsFetched?: boolean;
}
export class ProductService {
constructor(private prisma: PrismaClient) {}
export interface Product {
id: number;
externalId: string;
storeId: number;
categoryId: number | null;
name: string;
description: string | null;
url: string | null;
imageUrl: string | null;
currentPrice: string;
unit: string | null;
weight: string | null;
brand: string | null;
oldPrice: string | null;
discountPercent: string | null;
promotionEndDate: Date | null;
rating: string | null;
scoresCount: number | null;
commentsCount: number | null;
quantity: number | null;
badges: string | null;
isDetailsFetched: boolean;
createdAt: Date;
updatedAt: Date | null;
}
export interface Store {
id: number;
name: string;
type: string;
code: string | null;
url: string | null;
region: string | null;
address: string | null;
createdAt: Date;
updatedAt: Date | null;
}
export interface Category {
id: number;
externalId: number | null;
name: string;
parentId: number | null;
description: string | null;
createdAt: Date;
updatedAt: Date | null;
}
export class ProductService {
constructor(private drizzleDb: typeof db) {}
private get db() {
return this.drizzleDb;
}
/**
* Сохранение одного товара
*/
async saveProduct(data: CreateProductData): Promise<Product> {
try {
// Проверяем, существует ли товар
const existing = await this.prisma.product.findUnique({
where: {
externalId_storeId: {
externalId: data.externalId,
storeId: data.storeId,
},
},
});
const existing = await this.db
.select()
.from(products)
.where(
and(
eq(products.externalId, data.externalId),
eq(products.storeId, data.storeId)
)
)
.limit(1);
if (existing) {
// Обновляем существующий товар
if (existing.length > 0) {
Logger.debug(`Обновление товара: ${data.externalId}`);
// Если isDetailsFetched не передан явно, сохраняем текущее значение
const updateData: any = {
name: data.name,
description: data.description,
url: data.url,
imageUrl: data.imageUrl,
currentPrice: data.currentPrice,
currentPrice: data.currentPrice.toString(),
unit: data.unit,
weight: data.weight,
brand: data.brand,
oldPrice: data.oldPrice,
discountPercent: data.discountPercent,
oldPrice: data.oldPrice?.toString(),
discountPercent: data.discountPercent?.toString(),
promotionEndDate: data.promotionEndDate,
rating: data.rating,
rating: data.rating?.toString(),
scoresCount: data.scoresCount,
commentsCount: data.commentsCount,
quantity: data.quantity,
badges: data.badges,
categoryId: data.categoryId,
updatedAt: new Date(),
};
// Обновляем isDetailsFetched только если передано явно
if (data.isDetailsFetched !== undefined) {
updateData.isDetailsFetched = data.isDetailsFetched;
}
return await this.prisma.product.update({
where: { id: existing.id },
data: updateData,
});
const result = await this.db
.update(products)
.set(updateData)
.where(eq(products.id, existing[0].id))
.returning();
return result[0] as Product;
} else {
// Создаем новый товар
Logger.debug(`Создание нового товара: ${data.externalId}`);
return await this.prisma.product.create({
data,
});
const result = await this.db
.insert(products)
.values({
...data,
currentPrice: data.currentPrice.toString(),
oldPrice: data.oldPrice?.toString(),
discountPercent: data.discountPercent?.toString(),
rating: data.rating?.toString(),
updatedAt: new Date(),
})
.returning();
return result[0] as Product;
}
} catch (error) {
Logger.error('Ошибка сохранения товара:', error);
@@ -93,22 +155,18 @@ export class ProductService {
}
}
/**
* Сохранение нескольких товаров батчами
*/
async saveProducts(products: CreateProductData[]): Promise<number> {
async saveProducts(productsData: CreateProductData[]): Promise<number> {
try {
Logger.info(`Сохранение ${products.length} товаров...`);
Logger.info(`Сохранение ${productsData.length} товаров...`);
let saved = 0;
// Обрабатываем батчами по 50 товаров
const batchSize = 50;
for (let i = 0; i < products.length; i += batchSize) {
const batch = products.slice(i, i + batchSize);
for (let i = 0; i < productsData.length; i += batchSize) {
const batch = productsData.slice(i, i + batchSize);
const promises = batch.map(product => this.saveProduct(product));
await Promise.all(promises);
saved += batch.length;
Logger.info(`Сохранено товаров: ${saved}/${products.length}`);
Logger.info(`Сохранено товаров: ${saved}/${productsData.length}`);
}
Logger.info(`Всего сохранено товаров: ${saved}`);
@@ -119,22 +177,23 @@ export class ProductService {
}
}
/**
* Поиск товара по externalId и storeId
*/
async findByExternalId(
externalId: string,
storeId: number
): Promise<Product | null> {
try {
return await this.prisma.product.findUnique({
where: {
externalId_storeId: {
externalId,
storeId,
},
},
});
const result = await this.db
.select()
.from(products)
.where(
and(
eq(products.externalId, externalId),
eq(products.storeId, storeId)
)
)
.limit(1);
return result.length > 0 ? (result[0] as Product) : null;
} catch (error) {
Logger.error('Ошибка поиска товара:', error);
throw new DatabaseError(
@@ -144,30 +203,33 @@ export class ProductService {
}
}
/**
* Получение или создание магазина
*/
async getOrCreateStore(
code: string,
name: string = 'Магнит'
): Promise<Store> {
try {
let store = await this.prisma.store.findFirst({
where: { code },
});
const existing = await this.db
.select()
.from(stores)
.where(eq(stores.code, code))
.limit(1);
if (!store) {
Logger.info(`Создание нового магазина: ${code}`);
store = await this.prisma.store.create({
data: {
name,
type: 'web',
code,
},
});
if (existing.length > 0) {
return existing[0] as Store;
}
return store;
Logger.info(`Создание нового магазина: ${code}`);
const result = await this.db
.insert(stores)
.values({
name,
type: 'web',
code,
updatedAt: new Date(),
})
.returning();
return result[0] as Store;
} catch (error) {
Logger.error('Ошибка получения/создания магазина:', error);
throw new DatabaseError(
@@ -177,35 +239,41 @@ export class ProductService {
}
}
/**
* Получение или создание категории
*/
async getOrCreateCategory(
externalId: number,
name: string
): Promise<Category> {
try {
let category = await this.prisma.category.findFirst({
where: { externalId },
});
const existing = await this.db
.select()
.from(categories)
.where(eq(categories.externalId, externalId))
.limit(1);
if (!category) {
Logger.info(`Создание новой категории: ${name} (${externalId})`);
category = await this.prisma.category.create({
data: {
externalId,
name,
},
});
} else if (category.name !== name) {
// Обновляем название категории, если изменилось
category = await this.prisma.category.update({
where: { id: category.id },
data: { name },
});
if (existing.length > 0) {
const category = existing[0] as Category;
if (category.name !== name) {
const result = await this.db
.update(categories)
.set({ name, updatedAt: new Date() })
.where(eq(categories.id, category.id))
.returning();
return result[0] as Category;
}
return category;
}
return category;
Logger.info(`Создание новой категории: ${name} (${externalId})`);
const result = await this.db
.insert(categories)
.values({
externalId,
name,
updatedAt: new Date(),
})
.returning();
return result[0] as Category;
} catch (error) {
Logger.error('Ошибка получения/создания категории:', error);
throw new DatabaseError(
@@ -215,30 +283,33 @@ export class ProductService {
}
}
/**
* Получение товаров, для которых не были получены детали
*/
async getProductsNeedingDetails(storeCode: string, limit?: number): Promise<Product[]> {
try {
// Сначала находим store по code
const store = await this.prisma.store.findFirst({
where: { code: storeCode },
});
const storeResult = await this.db
.select()
.from(stores)
.where(eq(stores.code, storeCode))
.limit(1);
if (!store) {
if (storeResult.length === 0) {
throw new DatabaseError(`Магазин с кодом ${storeCode} не найден`);
}
return await this.prisma.product.findMany({
where: {
storeId: store.id,
isDetailsFetched: false,
},
take: limit,
orderBy: {
id: 'asc',
},
});
const store = storeResult[0];
const result = await this.db
.select()
.from(products)
.where(
and(
eq(products.storeId, store.id),
eq(products.isDetailsFetched, false)
)
)
.orderBy(asc(products.id))
.limit(limit || 1000);
return result as Product[];
} catch (error) {
Logger.error('Ошибка получения товаров для обогащения:', error);
throw new DatabaseError(
@@ -248,9 +319,6 @@ export class ProductService {
}
}
/**
* Обновление деталей товара (бренд, описание, вес, единица измерения, рейтинг)
*/
async updateProductDetails(
externalId: string,
storeId: number,
@@ -267,18 +335,23 @@ export class ProductService {
}
): Promise<Product> {
try {
return await this.prisma.product.update({
where: {
externalId_storeId: {
externalId,
storeId,
},
},
data: {
const result = await this.db
.update(products)
.set({
...details,
rating: details.rating?.toString(),
isDetailsFetched: true,
},
});
updatedAt: new Date(),
})
.where(
and(
eq(products.externalId, externalId),
eq(products.storeId, storeId)
)
)
.returning();
return result[0] as Product;
} catch (error) {
Logger.error('Ошибка обновления деталей товара:', error);
throw new DatabaseError(
@@ -288,22 +361,23 @@ export class ProductService {
}
}
/**
* Отметить товар как обработанный (даже если детали не были получены)
*/
async markAsDetailsFetched(externalId: string, storeId: number): Promise<Product> {
try {
return await this.prisma.product.update({
where: {
externalId_storeId: {
externalId,
storeId,
},
},
data: {
const result = await this.db
.update(products)
.set({
isDetailsFetched: true,
},
});
updatedAt: new Date(),
})
.where(
and(
eq(products.externalId, externalId),
eq(products.storeId, storeId)
)
)
.returning();
return result[0] as Product;
} catch (error) {
Logger.error('Ошибка отметки товара как обработанного:', error);
throw new DatabaseError(
@@ -313,4 +387,3 @@ export class ProductService {
}
}
}