first commit
This commit is contained in:
272
modules/storage.py
Normal file
272
modules/storage.py
Normal file
@@ -0,0 +1,272 @@
|
||||
# modules/storage.py
|
||||
"""
|
||||
Модуль для работы с хранилищем данных
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import json
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class StorageManager:
|
||||
"""Менеджер для работы с базой данных"""
|
||||
|
||||
def __init__(self, config):
|
||||
self.config = config
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
# Инициализация БД
|
||||
self.db_type = config.get('database.type', 'sqlite')
|
||||
|
||||
if self.db_type == 'sqlite':
|
||||
self.db_path = config.get('database.sqlite_path', 'data/morele_parser.db')
|
||||
Path(self.db_path).parent.mkdir(parents=True, exist_ok=True)
|
||||
self._init_sqlite()
|
||||
else:
|
||||
raise NotImplementedError("Пока поддерживается только SQLite")
|
||||
|
||||
def _init_sqlite(self):
|
||||
"""Инициализирует SQLite базу данных"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.executescript("""
|
||||
CREATE TABLE IF NOT EXISTS products (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
external_id TEXT UNIQUE NOT NULL,
|
||||
url TEXT NOT NULL,
|
||||
title TEXT NOT NULL,
|
||||
title_ua TEXT,
|
||||
price REAL NOT NULL,
|
||||
availability TEXT,
|
||||
description TEXT,
|
||||
description_ua TEXT,
|
||||
attributes TEXT,
|
||||
attributes_ua TEXT,
|
||||
category TEXT,
|
||||
brand TEXT,
|
||||
model TEXT,
|
||||
sku TEXT,
|
||||
images TEXT,
|
||||
local_images TEXT,
|
||||
content_hash TEXT,
|
||||
is_translated BOOLEAN DEFAULT 0,
|
||||
is_active BOOLEAN DEFAULT 1,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
|
||||
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS categories (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
name TEXT NOT NULL,
|
||||
url TEXT UNIQUE NOT NULL,
|
||||
is_active BOOLEAN DEFAULT 1,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS translation_cache (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
original_text TEXT UNIQUE NOT NULL,
|
||||
translated_text TEXT NOT NULL,
|
||||
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS parsing_logs (
|
||||
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
||||
category_url TEXT,
|
||||
products_found INTEGER,
|
||||
products_new INTEGER,
|
||||
products_updated INTEGER,
|
||||
errors_count INTEGER,
|
||||
started_at TIMESTAMP,
|
||||
completed_at TIMESTAMP
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_products_external_id ON products(external_id);
|
||||
CREATE INDEX IF NOT EXISTS idx_products_url ON products(url);
|
||||
CREATE INDEX IF NOT EXISTS idx_translation_cache_original ON translation_cache(original_text);
|
||||
""")
|
||||
|
||||
def save_product(self, product):
|
||||
"""Сохраняет товар в базу данных"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO products (
|
||||
external_id, url, title, title_ua, price, availability,
|
||||
description, description_ua, attributes, attributes_ua,
|
||||
category, brand, model, sku, images, local_images,
|
||||
content_hash, is_translated, updated_at
|
||||
) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
product['id'],
|
||||
product['url'],
|
||||
product['title'],
|
||||
product.get('title_ua', ''),
|
||||
product['price'],
|
||||
product['availability'],
|
||||
product['description'],
|
||||
product.get('description_ua', ''),
|
||||
json.dumps(product.get('attributes', {}), ensure_ascii=False),
|
||||
json.dumps(product.get('attributes_ua', {}), ensure_ascii=False),
|
||||
product.get('category', ''),
|
||||
product.get('brand', ''),
|
||||
product.get('model', ''),
|
||||
product.get('sku', ''),
|
||||
json.dumps(product.get('images', [])),
|
||||
json.dumps(product.get('local_images', [])),
|
||||
product.get('content_hash', ''),
|
||||
product.get('is_translated', False),
|
||||
datetime.now().isoformat()
|
||||
))
|
||||
|
||||
def get_product_by_url(self, url):
|
||||
"""Получает товар по URL"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.execute("SELECT * FROM products WHERE url = ?", (url,))
|
||||
row = cursor.fetchone()
|
||||
|
||||
if row:
|
||||
product = dict(row)
|
||||
product['attributes'] = json.loads(product['attributes'] or '{}')
|
||||
product['attributes_ua'] = json.loads(product['attributes_ua'] or '{}')
|
||||
product['images'] = json.loads(product['images'] or '[]')
|
||||
product['local_images'] = json.loads(product['local_images'] or '[]')
|
||||
return product
|
||||
|
||||
return None
|
||||
|
||||
def get_product_by_id(self, product_id):
|
||||
"""Получает товар по ID"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.execute("SELECT * FROM products WHERE id = ?", (product_id,))
|
||||
row = cursor.fetchone()
|
||||
|
||||
if row:
|
||||
product = dict(row)
|
||||
product['attributes'] = json.loads(product['attributes'] or '{}')
|
||||
product['attributes_ua'] = json.loads(product['attributes_ua'] or '{}')
|
||||
product['images'] = json.loads(product['images'] or '[]')
|
||||
product['local_images'] = json.loads(product['local_images'] or '[]')
|
||||
return product
|
||||
|
||||
return None
|
||||
|
||||
def update_product(self, product_id, product_data):
|
||||
"""Обновляет товар"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.execute("""
|
||||
UPDATE products SET
|
||||
title = ?, title_ua = ?, price = ?, availability = ?,
|
||||
description = ?, description_ua = ?, attributes = ?, attributes_ua = ?,
|
||||
category = ?, brand = ?, model = ?, sku = ?, images = ?, local_images = ?,
|
||||
content_hash = ?, is_translated = ?, updated_at = ?
|
||||
WHERE id = ?
|
||||
""", (
|
||||
product_data['title'],
|
||||
product_data.get('title_ua', ''),
|
||||
product_data['price'],
|
||||
product_data['availability'],
|
||||
product_data['description'],
|
||||
product_data.get('description_ua', ''),
|
||||
json.dumps(product_data.get('attributes', {}), ensure_ascii=False),
|
||||
json.dumps(product_data.get('attributes_ua', {}), ensure_ascii=False),
|
||||
product_data.get('category', ''),
|
||||
product_data.get('brand', ''),
|
||||
product_data.get('model', ''),
|
||||
product_data.get('sku', ''),
|
||||
json.dumps(product_data.get('images', [])),
|
||||
json.dumps(product_data.get('local_images', [])),
|
||||
product_data.get('content_hash', ''),
|
||||
product_data.get('is_translated', False),
|
||||
datetime.now().isoformat(),
|
||||
product_id
|
||||
))
|
||||
|
||||
def get_active_categories(self):
|
||||
"""Получает список активных категорий для парсинга"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.execute("SELECT * FROM categories WHERE is_active = 1")
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
|
||||
def add_category(self, name, url):
|
||||
"""Добавляет категорию"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO categories (name, url) VALUES (?, ?)
|
||||
""", (name, url))
|
||||
|
||||
def deactivate_category(self, category_id):
|
||||
"""Деактивирует категорию"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.execute("UPDATE categories SET is_active = 0 WHERE id = ?", (category_id,))
|
||||
|
||||
def get_translation_from_cache(self, original_text):
|
||||
"""Получает перевод из кеша"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.execute(
|
||||
"SELECT translated_text FROM translation_cache WHERE original_text = ?",
|
||||
(original_text,)
|
||||
)
|
||||
row = cursor.fetchone()
|
||||
return row[0] if row else None
|
||||
|
||||
def save_translation_to_cache(self, original_text, translated_text):
|
||||
"""Сохраняет перевод в кеш"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.execute("""
|
||||
INSERT OR REPLACE INTO translation_cache (original_text, translated_text)
|
||||
VALUES (?, ?)
|
||||
""", (original_text, translated_text))
|
||||
|
||||
def get_products_for_feed(self):
|
||||
"""Получает товары для генерации фида"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.execute("""
|
||||
SELECT * FROM products
|
||||
WHERE is_active = 1 AND is_translated = 1 AND price > 0
|
||||
ORDER BY updated_at DESC
|
||||
""")
|
||||
|
||||
products = []
|
||||
for row in cursor.fetchall():
|
||||
product = dict(row)
|
||||
product['attributes'] = json.loads(product['attributes'] or '{}')
|
||||
product['attributes_ua'] = json.loads(product['attributes_ua'] or '{}')
|
||||
product['images'] = json.loads(product['images'] or '[]')
|
||||
product['local_images'] = json.loads(product['local_images'] or '[]')
|
||||
products.append(product)
|
||||
|
||||
return products
|
||||
|
||||
def log_parsing_session(self, category_url, stats):
|
||||
"""Логирует сессию парсинга"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.execute("""
|
||||
INSERT INTO parsing_logs
|
||||
(category_url, products_found, products_new, products_updated, errors_count, started_at, completed_at)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
""", (
|
||||
category_url,
|
||||
stats.get('found', 0),
|
||||
stats.get('new', 0),
|
||||
stats.get('updated', 0),
|
||||
stats.get('errors', 0),
|
||||
stats.get('started_at'),
|
||||
stats.get('completed_at')
|
||||
))
|
||||
|
||||
def get_parsing_stats(self, days=30):
|
||||
"""Получает статистику парсинга за последние дни"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.row_factory = sqlite3.Row
|
||||
cursor = conn.execute("""
|
||||
SELECT * FROM parsing_logs
|
||||
WHERE completed_at > datetime('now', '-{} days')
|
||||
ORDER BY completed_at DESC
|
||||
""".format(days))
|
||||
|
||||
return [dict(row) for row in cursor.fetchall()]
|
||||
Reference in New Issue
Block a user