Files
mario_scraper/translator.py

96 lines
3.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from deep_translator import GoogleTranslator
from typing import Dict, Any, List
import time
from translation_cache import TranslationCache
class ProductTranslator:
def __init__(self):
self.translator = GoogleTranslator(source="pl", target="uk")
self.cache = TranslationCache() # 🧠 кеш ініціалізація
def translate_text(self, text: str) -> str:
"""Переводит текст с кешем, обработкой ошибок и задержкой"""
if not text or not isinstance(text, str):
return text
# 🧠 Перевірка кешу
cached = self.cache.get(text)
if cached:
return cached
try:
translated = self.translator.translate(text)
time.sleep(0.5) # Затримка щоб уникнути блокування
if translated and translated != text:
self.cache.add(text, translated)
self.cache.save_cache() # 💾 збереження кешу
return translated
except Exception as e:
error_text = str(e).lower()
print(f"[ERROR] Ошибка перевода: {e}")
if (
"too many requests" in error_text
or "you are allowed to make" in error_text
or "5 requests per second" in error_text
):
raise e
return text
def translate_list(self, items: List[str]) -> List[str]:
"""Переводит список строк"""
return [self.translate_text(item) for item in items]
def is_translated(self, original: str, translated: str) -> bool:
"""Проверяет, был ли текст действительно переведен"""
return original.strip() != translated.strip()
def translate_product(self, product: Dict[str, Any]) -> Dict[str, Any] | None:
translated = product.copy()
any_changes = False
# Название
translated_name = self.translate_text(product["name"])
if self.is_translated(product["name"], translated_name):
translated["name"] = translated_name
any_changes = True
else:
print(f"[SKIP] Название не переведено: {product['name']}")
# Атрибути
if "attributes" in translated:
for attr in translated["attributes"]:
name_trans = self.translate_text(attr["name"])
if self.is_translated(attr["name"], name_trans):
attr["name"] = name_trans
any_changes = True
values_trans = self.translate_list(attr["value"])
if values_trans != attr["value"]:
attr["value"] = values_trans
any_changes = True
# Опис
if "description" in translated:
for section in translated["description"]:
title_trans = self.translate_text(section["title"])
text_trans = self.translate_text(section["text"])
if self.is_translated(section["title"], title_trans):
section["title"] = title_trans
any_changes = True
if self.is_translated(section["text"], text_trans):
section["text"] = text_trans
any_changes = True
if any_changes:
return translated
else:
print(f"[SKIP] Товар не содержит переведенных полей: {product['name']}")
return None