first commit
This commit is contained in:
93
utils/db_manager.py
Normal file
93
utils/db_manager.py
Normal file
@@ -0,0 +1,93 @@
|
||||
# utils/db_manager.py
|
||||
"""
|
||||
Утилиты для управления базой данных
|
||||
"""
|
||||
|
||||
import sqlite3
|
||||
import json
|
||||
from pathlib import Path
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
|
||||
class DatabaseManager:
|
||||
"""Управление базой данных парсера"""
|
||||
|
||||
def __init__(self, db_path):
|
||||
self.db_path = db_path
|
||||
|
||||
def backup_database(self, backup_dir='backups'):
|
||||
"""Создаёт резервную копию базы данных"""
|
||||
backup_path = Path(backup_dir)
|
||||
backup_path.mkdir(exist_ok=True)
|
||||
|
||||
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
|
||||
backup_file = backup_path / f"morele_parser_backup_{timestamp}.db"
|
||||
|
||||
# Копируем базу данных
|
||||
import shutil
|
||||
shutil.copy2(self.db_path, backup_file)
|
||||
|
||||
print(f"Backup created: {backup_file}")
|
||||
return backup_file
|
||||
|
||||
def cleanup_old_data(self, days=30):
|
||||
"""Очищает старые данные"""
|
||||
cutoff_date = datetime.now() - timedelta(days=days)
|
||||
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
# Удаляем старые логи парсинга
|
||||
cursor = conn.execute("""
|
||||
DELETE FROM parsing_logs
|
||||
WHERE completed_at < ?
|
||||
""", (cutoff_date.isoformat(),))
|
||||
|
||||
deleted_logs = cursor.rowcount
|
||||
|
||||
# Очищаем кеш переводов старше определённого времени
|
||||
cursor = conn.execute("""
|
||||
DELETE FROM translation_cache
|
||||
WHERE created_at < ?
|
||||
""", (cutoff_date.isoformat(),))
|
||||
|
||||
deleted_cache = cursor.rowcount
|
||||
|
||||
print(f"Deleted {deleted_logs} old parsing logs")
|
||||
print(f"Deleted {deleted_cache} old translation cache entries")
|
||||
|
||||
def optimize_database(self):
|
||||
"""Оптимизирует базу данных"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
conn.execute("VACUUM")
|
||||
conn.execute("ANALYZE")
|
||||
|
||||
print("Database optimized")
|
||||
|
||||
def get_database_stats(self):
|
||||
"""Получает статистику базы данных"""
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
stats = {}
|
||||
|
||||
# Количество товаров
|
||||
cursor = conn.execute("SELECT COUNT(*) FROM products")
|
||||
stats['total_products'] = cursor.fetchone()[0]
|
||||
|
||||
cursor = conn.execute("SELECT COUNT(*) FROM products WHERE is_active = 1")
|
||||
stats['active_products'] = cursor.fetchone()[0]
|
||||
|
||||
cursor = conn.execute("SELECT COUNT(*) FROM products WHERE is_translated = 1")
|
||||
stats['translated_products'] = cursor.fetchone()[0]
|
||||
|
||||
# Количество категорий
|
||||
cursor = conn.execute("SELECT COUNT(*) FROM categories WHERE is_active = 1")
|
||||
stats['active_categories'] = cursor.fetchone()[0]
|
||||
|
||||
# Размер кеша переводов
|
||||
cursor = conn.execute("SELECT COUNT(*) FROM translation_cache")
|
||||
stats['translation_cache_size'] = cursor.fetchone()[0]
|
||||
|
||||
# Размер файла БД
|
||||
db_file = Path(self.db_path)
|
||||
if db_file.exists():
|
||||
stats['db_size_mb'] = round(db_file.stat().st_size / 1024 / 1024, 2)
|
||||
|
||||
return stats
|
||||
131
utils/feed_validator.py
Normal file
131
utils/feed_validator.py
Normal file
@@ -0,0 +1,131 @@
|
||||
# utils/feed_validator.py
|
||||
"""
|
||||
Валидатор YML фида
|
||||
"""
|
||||
|
||||
import xml.etree.ElementTree as ET
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class FeedValidator:
|
||||
"""Валидатор YML фида для Prom.ua"""
|
||||
|
||||
def __init__(self):
|
||||
self.errors = []
|
||||
self.warnings = []
|
||||
|
||||
def validate_feed(self, feed_path):
|
||||
"""Валидирует YML фид"""
|
||||
self.errors = []
|
||||
self.warnings = []
|
||||
|
||||
try:
|
||||
tree = ET.parse(feed_path)
|
||||
root = tree.getroot()
|
||||
|
||||
# Проверяем структуру
|
||||
self._validate_structure(root)
|
||||
|
||||
# Проверяем offers
|
||||
offers = root.find('.//offers')
|
||||
if offers is not None:
|
||||
self._validate_offers(offers)
|
||||
|
||||
# Проверяем категории
|
||||
categories = root.find('.//categories')
|
||||
if categories is not None:
|
||||
self._validate_categories(categories)
|
||||
|
||||
return len(self.errors) == 0
|
||||
|
||||
except ET.ParseError as e:
|
||||
self.errors.append(f"XML parsing error: {e}")
|
||||
return False
|
||||
except Exception as e:
|
||||
self.errors.append(f"Validation error: {e}")
|
||||
return False
|
||||
|
||||
def _validate_structure(self, root):
|
||||
"""Проверяет основную структуру"""
|
||||
if root.tag != 'yml_catalog':
|
||||
self.errors.append("Root element must be 'yml_catalog'")
|
||||
|
||||
shop = root.find('shop')
|
||||
if shop is None:
|
||||
self.errors.append("Missing 'shop' element")
|
||||
return
|
||||
|
||||
required_elements = ['name', 'company', 'currencies', 'categories', 'offers']
|
||||
for element in required_elements:
|
||||
if shop.find(element) is None:
|
||||
self.errors.append(f"Missing required element: {element}")
|
||||
|
||||
def _validate_offers(self, offers):
|
||||
"""Проверяет offers"""
|
||||
offer_count = 0
|
||||
|
||||
for offer in offers.findall('offer'):
|
||||
offer_count += 1
|
||||
offer_id = offer.get('id')
|
||||
|
||||
if not offer_id:
|
||||
self.errors.append(f"Offer {offer_count} missing id attribute")
|
||||
|
||||
# Проверяем обязательные поля
|
||||
required_fields = ['name', 'price', 'currencyId']
|
||||
for field in required_fields:
|
||||
if offer.find(field) is None:
|
||||
self.errors.append(f"Offer {offer_id} missing required field: {field}")
|
||||
|
||||
# Проверяем цену
|
||||
price_elem = offer.find('price')
|
||||
if price_elem is not None:
|
||||
try:
|
||||
price = float(price_elem.text)
|
||||
if price <= 0:
|
||||
self.errors.append(f"Offer {offer_id} has invalid price: {price}")
|
||||
except ValueError:
|
||||
self.errors.append(f"Offer {offer_id} has non-numeric price")
|
||||
|
||||
# Проверяем изображения
|
||||
pictures = offer.findall('picture')
|
||||
if not pictures:
|
||||
self.warnings.append(f"Offer {offer_id} has no images")
|
||||
|
||||
def _validate_categories(self, categories):
|
||||
"""Проверяет категории"""
|
||||
category_ids = set()
|
||||
|
||||
for category in categories.findall('category'):
|
||||
cat_id = category.get('id')
|
||||
|
||||
if not cat_id:
|
||||
self.errors.append("Category missing id attribute")
|
||||
continue
|
||||
|
||||
if cat_id in category_ids:
|
||||
self.errors.append(f"Duplicate category id: {cat_id}")
|
||||
|
||||
category_ids.add(cat_id)
|
||||
|
||||
if not category.text or not category.text.strip():
|
||||
self.errors.append(f"Category {cat_id} has empty name")
|
||||
|
||||
def get_report(self):
|
||||
"""Возвращает отчёт валидации"""
|
||||
report = []
|
||||
|
||||
if self.errors:
|
||||
report.append("ERRORS:")
|
||||
for error in self.errors:
|
||||
report.append(f" - {error}")
|
||||
|
||||
if self.warnings:
|
||||
report.append("WARNINGS:")
|
||||
for warning in self.warnings:
|
||||
report.append(f" - {warning}")
|
||||
|
||||
if not self.errors and not self.warnings:
|
||||
report.append("Feed is valid!")
|
||||
|
||||
return '\n'.join(report)
|
||||
66
utils/image_optimizer.py
Normal file
66
utils/image_optimizer.py
Normal file
@@ -0,0 +1,66 @@
|
||||
# utils/image_optimizer.py
|
||||
"""
|
||||
Утилиты для оптимизации изображений
|
||||
"""
|
||||
|
||||
from PIL import Image
|
||||
import os
|
||||
from pathlib import Path
|
||||
import logging
|
||||
|
||||
|
||||
class ImageOptimizer:
|
||||
"""Оптимизатор изображений"""
|
||||
|
||||
def __init__(self, quality=85, max_size=(1200, 1200)):
|
||||
self.quality = quality
|
||||
self.max_size = max_size
|
||||
self.logger = logging.getLogger(__name__)
|
||||
|
||||
def optimize_image(self, image_path, output_path=None):
|
||||
"""Оптимизирует одно изображение"""
|
||||
if output_path is None:
|
||||
output_path = image_path
|
||||
|
||||
try:
|
||||
with Image.open(image_path) as img:
|
||||
# Конвертируем в RGB если нужно
|
||||
if img.mode in ('RGBA', 'LA', 'P'):
|
||||
background = Image.new('RGB', img.size, (255, 255, 255))
|
||||
if img.mode == 'P':
|
||||
img = img.convert('RGBA')
|
||||
if img.mode in ('RGBA', 'LA'):
|
||||
background.paste(img, mask=img.split()[-1])
|
||||
img = background
|
||||
|
||||
# Изменяем размер если нужно
|
||||
if img.size[0] > self.max_size[0] or img.size[1] > self.max_size[1]:
|
||||
img.thumbnail(self.max_size, Image.Resampling.LANCZOS)
|
||||
|
||||
# Сохраняем с оптимизацией
|
||||
img.save(output_path, 'JPEG', quality=self.quality, optimize=True)
|
||||
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error optimizing image {image_path}: {e}")
|
||||
return False
|
||||
|
||||
def optimize_directory(self, directory_path, extensions=None):
|
||||
"""Оптимизирует все изображения в директории"""
|
||||
if extensions is None:
|
||||
extensions = ['.jpg', '.jpeg', '.png', '.webp']
|
||||
|
||||
directory = Path(directory_path)
|
||||
optimized_count = 0
|
||||
error_count = 0
|
||||
|
||||
for file_path in directory.rglob('*'):
|
||||
if file_path.suffix.lower() in extensions:
|
||||
if self.optimize_image(file_path):
|
||||
optimized_count += 1
|
||||
else:
|
||||
error_count += 1
|
||||
|
||||
print(f"Optimized {optimized_count} images, {error_count} errors")
|
||||
return optimized_count, error_count
|
||||
60
utils/monitor.py
Normal file
60
utils/monitor.py
Normal file
@@ -0,0 +1,60 @@
|
||||
# utils/monitor.py
|
||||
"""
|
||||
Система мониторинга парсера
|
||||
"""
|
||||
|
||||
import psutil
|
||||
import time
|
||||
import json
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class SystemMonitor:
|
||||
"""Мониторинг системных ресурсов"""
|
||||
|
||||
def __init__(self, log_file='logs/monitoring.log'):
|
||||
self.log_file = Path(log_file)
|
||||
self.log_file.parent.mkdir(exist_ok=True)
|
||||
|
||||
def get_system_stats(self):
|
||||
"""Получает статистику системы"""
|
||||
stats = {
|
||||
'timestamp': datetime.now().isoformat(),
|
||||
'cpu_percent': psutil.cpu_percent(interval=1),
|
||||
'memory_percent': psutil.virtual_memory().percent,
|
||||
'disk_usage': psutil.disk_usage('/').percent,
|
||||
'network_io': dict(psutil.net_io_counters()._asdict()) if hasattr(psutil, 'net_io_counters') else {},
|
||||
'process_count': len(psutil.pids())
|
||||
}
|
||||
|
||||
return stats
|
||||
|
||||
def log_stats(self):
|
||||
"""Записывает статистику в лог"""
|
||||
stats = self.get_system_stats()
|
||||
|
||||
with open(self.log_file, 'a', encoding='utf-8') as f:
|
||||
f.write(json.dumps(stats, ensure_ascii=False) + '\n')
|
||||
|
||||
def check_disk_space(self, warning_threshold=80, critical_threshold=90):
|
||||
"""Проверяет свободное место на диске"""
|
||||
disk_usage = psutil.disk_usage('/').percent
|
||||
|
||||
if disk_usage >= critical_threshold:
|
||||
return 'critical', f"Критически мало места на диске: {disk_usage}%"
|
||||
elif disk_usage >= warning_threshold:
|
||||
return 'warning', f"Мало места на диске: {disk_usage}%"
|
||||
else:
|
||||
return 'ok', f"Место на диске: {disk_usage}%"
|
||||
|
||||
def check_memory_usage(self, warning_threshold=80, critical_threshold=90):
|
||||
"""Проверяет использование памяти"""
|
||||
memory_usage = psutil.virtual_memory().percent
|
||||
|
||||
if memory_usage >= critical_threshold:
|
||||
return 'critical', f"Критически высокое использование памяти: {memory_usage}%"
|
||||
elif memory_usage >= warning_threshold:
|
||||
return 'warning', f"Высокое использование памяти: {memory_usage}%"
|
||||
else:
|
||||
return 'ok', f"Использование памяти: {memory_usage}%"
|
||||
Reference in New Issue
Block a user