first commit

This commit is contained in:
2025-06-18 21:22:55 +03:00
commit ad4d215f04
22 changed files with 3762 additions and 0 deletions

93
utils/db_manager.py Normal file
View File

@@ -0,0 +1,93 @@
# utils/db_manager.py
"""
Утилиты для управления базой данных
"""
import sqlite3
import json
from pathlib import Path
from datetime import datetime, timedelta
class DatabaseManager:
"""Управление базой данных парсера"""
def __init__(self, db_path):
self.db_path = db_path
def backup_database(self, backup_dir='backups'):
"""Создаёт резервную копию базы данных"""
backup_path = Path(backup_dir)
backup_path.mkdir(exist_ok=True)
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_file = backup_path / f"morele_parser_backup_{timestamp}.db"
# Копируем базу данных
import shutil
shutil.copy2(self.db_path, backup_file)
print(f"Backup created: {backup_file}")
return backup_file
def cleanup_old_data(self, days=30):
"""Очищает старые данные"""
cutoff_date = datetime.now() - timedelta(days=days)
with sqlite3.connect(self.db_path) as conn:
# Удаляем старые логи парсинга
cursor = conn.execute("""
DELETE FROM parsing_logs
WHERE completed_at < ?
""", (cutoff_date.isoformat(),))
deleted_logs = cursor.rowcount
# Очищаем кеш переводов старше определённого времени
cursor = conn.execute("""
DELETE FROM translation_cache
WHERE created_at < ?
""", (cutoff_date.isoformat(),))
deleted_cache = cursor.rowcount
print(f"Deleted {deleted_logs} old parsing logs")
print(f"Deleted {deleted_cache} old translation cache entries")
def optimize_database(self):
"""Оптимизирует базу данных"""
with sqlite3.connect(self.db_path) as conn:
conn.execute("VACUUM")
conn.execute("ANALYZE")
print("Database optimized")
def get_database_stats(self):
"""Получает статистику базы данных"""
with sqlite3.connect(self.db_path) as conn:
stats = {}
# Количество товаров
cursor = conn.execute("SELECT COUNT(*) FROM products")
stats['total_products'] = cursor.fetchone()[0]
cursor = conn.execute("SELECT COUNT(*) FROM products WHERE is_active = 1")
stats['active_products'] = cursor.fetchone()[0]
cursor = conn.execute("SELECT COUNT(*) FROM products WHERE is_translated = 1")
stats['translated_products'] = cursor.fetchone()[0]
# Количество категорий
cursor = conn.execute("SELECT COUNT(*) FROM categories WHERE is_active = 1")
stats['active_categories'] = cursor.fetchone()[0]
# Размер кеша переводов
cursor = conn.execute("SELECT COUNT(*) FROM translation_cache")
stats['translation_cache_size'] = cursor.fetchone()[0]
# Размер файла БД
db_file = Path(self.db_path)
if db_file.exists():
stats['db_size_mb'] = round(db_file.stat().st_size / 1024 / 1024, 2)
return stats

131
utils/feed_validator.py Normal file
View File

@@ -0,0 +1,131 @@
# utils/feed_validator.py
"""
Валидатор YML фида
"""
import xml.etree.ElementTree as ET
from pathlib import Path
class FeedValidator:
"""Валидатор YML фида для Prom.ua"""
def __init__(self):
self.errors = []
self.warnings = []
def validate_feed(self, feed_path):
"""Валидирует YML фид"""
self.errors = []
self.warnings = []
try:
tree = ET.parse(feed_path)
root = tree.getroot()
# Проверяем структуру
self._validate_structure(root)
# Проверяем offers
offers = root.find('.//offers')
if offers is not None:
self._validate_offers(offers)
# Проверяем категории
categories = root.find('.//categories')
if categories is not None:
self._validate_categories(categories)
return len(self.errors) == 0
except ET.ParseError as e:
self.errors.append(f"XML parsing error: {e}")
return False
except Exception as e:
self.errors.append(f"Validation error: {e}")
return False
def _validate_structure(self, root):
"""Проверяет основную структуру"""
if root.tag != 'yml_catalog':
self.errors.append("Root element must be 'yml_catalog'")
shop = root.find('shop')
if shop is None:
self.errors.append("Missing 'shop' element")
return
required_elements = ['name', 'company', 'currencies', 'categories', 'offers']
for element in required_elements:
if shop.find(element) is None:
self.errors.append(f"Missing required element: {element}")
def _validate_offers(self, offers):
"""Проверяет offers"""
offer_count = 0
for offer in offers.findall('offer'):
offer_count += 1
offer_id = offer.get('id')
if not offer_id:
self.errors.append(f"Offer {offer_count} missing id attribute")
# Проверяем обязательные поля
required_fields = ['name', 'price', 'currencyId']
for field in required_fields:
if offer.find(field) is None:
self.errors.append(f"Offer {offer_id} missing required field: {field}")
# Проверяем цену
price_elem = offer.find('price')
if price_elem is not None:
try:
price = float(price_elem.text)
if price <= 0:
self.errors.append(f"Offer {offer_id} has invalid price: {price}")
except ValueError:
self.errors.append(f"Offer {offer_id} has non-numeric price")
# Проверяем изображения
pictures = offer.findall('picture')
if not pictures:
self.warnings.append(f"Offer {offer_id} has no images")
def _validate_categories(self, categories):
"""Проверяет категории"""
category_ids = set()
for category in categories.findall('category'):
cat_id = category.get('id')
if not cat_id:
self.errors.append("Category missing id attribute")
continue
if cat_id in category_ids:
self.errors.append(f"Duplicate category id: {cat_id}")
category_ids.add(cat_id)
if not category.text or not category.text.strip():
self.errors.append(f"Category {cat_id} has empty name")
def get_report(self):
"""Возвращает отчёт валидации"""
report = []
if self.errors:
report.append("ERRORS:")
for error in self.errors:
report.append(f" - {error}")
if self.warnings:
report.append("WARNINGS:")
for warning in self.warnings:
report.append(f" - {warning}")
if not self.errors and not self.warnings:
report.append("Feed is valid!")
return '\n'.join(report)

66
utils/image_optimizer.py Normal file
View File

@@ -0,0 +1,66 @@
# utils/image_optimizer.py
"""
Утилиты для оптимизации изображений
"""
from PIL import Image
import os
from pathlib import Path
import logging
class ImageOptimizer:
"""Оптимизатор изображений"""
def __init__(self, quality=85, max_size=(1200, 1200)):
self.quality = quality
self.max_size = max_size
self.logger = logging.getLogger(__name__)
def optimize_image(self, image_path, output_path=None):
"""Оптимизирует одно изображение"""
if output_path is None:
output_path = image_path
try:
with Image.open(image_path) as img:
# Конвертируем в RGB если нужно
if img.mode in ('RGBA', 'LA', 'P'):
background = Image.new('RGB', img.size, (255, 255, 255))
if img.mode == 'P':
img = img.convert('RGBA')
if img.mode in ('RGBA', 'LA'):
background.paste(img, mask=img.split()[-1])
img = background
# Изменяем размер если нужно
if img.size[0] > self.max_size[0] or img.size[1] > self.max_size[1]:
img.thumbnail(self.max_size, Image.Resampling.LANCZOS)
# Сохраняем с оптимизацией
img.save(output_path, 'JPEG', quality=self.quality, optimize=True)
return True
except Exception as e:
self.logger.error(f"Error optimizing image {image_path}: {e}")
return False
def optimize_directory(self, directory_path, extensions=None):
"""Оптимизирует все изображения в директории"""
if extensions is None:
extensions = ['.jpg', '.jpeg', '.png', '.webp']
directory = Path(directory_path)
optimized_count = 0
error_count = 0
for file_path in directory.rglob('*'):
if file_path.suffix.lower() in extensions:
if self.optimize_image(file_path):
optimized_count += 1
else:
error_count += 1
print(f"Optimized {optimized_count} images, {error_count} errors")
return optimized_count, error_count

60
utils/monitor.py Normal file
View File

@@ -0,0 +1,60 @@
# utils/monitor.py
"""
Система мониторинга парсера
"""
import psutil
import time
import json
from datetime import datetime
from pathlib import Path
class SystemMonitor:
"""Мониторинг системных ресурсов"""
def __init__(self, log_file='logs/monitoring.log'):
self.log_file = Path(log_file)
self.log_file.parent.mkdir(exist_ok=True)
def get_system_stats(self):
"""Получает статистику системы"""
stats = {
'timestamp': datetime.now().isoformat(),
'cpu_percent': psutil.cpu_percent(interval=1),
'memory_percent': psutil.virtual_memory().percent,
'disk_usage': psutil.disk_usage('/').percent,
'network_io': dict(psutil.net_io_counters()._asdict()) if hasattr(psutil, 'net_io_counters') else {},
'process_count': len(psutil.pids())
}
return stats
def log_stats(self):
"""Записывает статистику в лог"""
stats = self.get_system_stats()
with open(self.log_file, 'a', encoding='utf-8') as f:
f.write(json.dumps(stats, ensure_ascii=False) + '\n')
def check_disk_space(self, warning_threshold=80, critical_threshold=90):
"""Проверяет свободное место на диске"""
disk_usage = psutil.disk_usage('/').percent
if disk_usage >= critical_threshold:
return 'critical', f"Критически мало места на диске: {disk_usage}%"
elif disk_usage >= warning_threshold:
return 'warning', f"Мало места на диске: {disk_usage}%"
else:
return 'ok', f"Место на диске: {disk_usage}%"
def check_memory_usage(self, warning_threshold=80, critical_threshold=90):
"""Проверяет использование памяти"""
memory_usage = psutil.virtual_memory().percent
if memory_usage >= critical_threshold:
return 'critical', f"Критически высокое использование памяти: {memory_usage}%"
elif memory_usage >= warning_threshold:
return 'warning', f"Высокое использование памяти: {memory_usage}%"
else:
return 'ok', f"Использование памяти: {memory_usage}%"