251 lines
8.6 KiB
Python
251 lines
8.6 KiB
Python
from config import BASE_URL
|
||
|
||
import json
|
||
import xml.etree.ElementTree as ET
|
||
from typing import List, Dict
|
||
from datetime import datetime
|
||
from urllib.parse import urljoin
|
||
|
||
|
||
class RobotVacuumYMLGenerator:
|
||
def __init__(
|
||
self,
|
||
shop_name: str = "Euro Electronics",
|
||
base_url: str = BASE_URL,
|
||
use_original_urls: bool = False,
|
||
categories_data: List[Dict] = None,
|
||
):
|
||
"""
|
||
Initialize YML feed generator
|
||
|
||
:param shop_name: Name of the shop
|
||
:param base_url: Base URL for image hosting
|
||
:param use_original_urls: If True, use original image URLs instead of local ones
|
||
"""
|
||
self.root = ET.Element(
|
||
"yml_catalog", {"date": datetime.now().strftime("%Y-%m-%d %H:%M")}
|
||
)
|
||
self.shop = ET.SubElement(self.root, "shop")
|
||
ET.SubElement(self.shop, "name").text = shop_name
|
||
self.base_url = base_url
|
||
self.use_original_urls = use_original_urls
|
||
|
||
self.categories = ET.SubElement(self.shop, "categories")
|
||
self.offers = ET.SubElement(self.shop, "offers")
|
||
self.categories_data = categories_data or []
|
||
|
||
def add_category(self, category_id: str, category_name: str, parent_id: str = None):
|
||
"""
|
||
Add category to YML feed
|
||
|
||
:param category_id: Category ID
|
||
:param category_name: Category name
|
||
:param parent_id: Parent category ID (optional)
|
||
"""
|
||
attrs = {"id": category_id}
|
||
if parent_id:
|
||
attrs["parentId"] = parent_id
|
||
|
||
category = ET.SubElement(self.categories, "category", attrs)
|
||
category.text = category_name
|
||
|
||
def get_image_url(self, local_path: str) -> str:
|
||
"""
|
||
Convert local path to full URL, normalizing path separators
|
||
|
||
:param local_path: Local path to image file
|
||
:return: Full URL with normalized path separators
|
||
"""
|
||
if not local_path:
|
||
return None
|
||
|
||
# Normalize path separators to forward slashes
|
||
normalized_path = local_path.replace("\\", "/")
|
||
return urljoin(self.base_url, normalized_path)
|
||
|
||
def process_attributes(self, attributes: List[Dict]) -> List[Dict]:
|
||
"""
|
||
Convert attributes to param format for YML
|
||
|
||
:param attributes: List of attribute dictionaries
|
||
:return: List of param dictionaries
|
||
"""
|
||
params = []
|
||
for attr in attributes:
|
||
value = attr["value"]
|
||
|
||
# Handle single or multiple values
|
||
if isinstance(value, list):
|
||
value = " | ".join(str(v) for v in value)
|
||
|
||
params.append({"name": attr["name"], "value": value})
|
||
return params
|
||
|
||
def clean_product_name(self, name: str) -> str:
|
||
"""
|
||
Очищает название продукта, удаляя кириллические слова после латинских символов
|
||
|
||
:param name: Исходное название продукта
|
||
:return: Очищенное название
|
||
"""
|
||
# Разбиваем строку на слова
|
||
words = name.split()
|
||
cleaned_words = []
|
||
last_latin_index = -1
|
||
|
||
# Проходим по словам и ищем последнее слово с латиницей
|
||
for i, word in enumerate(words):
|
||
# Проверяем, содержит ли слово латинские символы
|
||
if any(ord("a") <= ord(c.lower()) <= ord("z") for c in word):
|
||
last_latin_index = i
|
||
|
||
# Если нашли латинские символы, берём все слова до следующего после последнего латинского
|
||
if last_latin_index != -1:
|
||
cleaned_words = words[: last_latin_index + 1]
|
||
else:
|
||
cleaned_words = words
|
||
|
||
return " ".join(cleaned_words)
|
||
|
||
def add_offer(self, product: Dict):
|
||
"""
|
||
Add a robot vacuum cleaner offer to the YML feed
|
||
|
||
:param product: Product dictionary from JSON
|
||
"""
|
||
|
||
in_stock = product.get("in_stock", False)
|
||
|
||
offer = ET.SubElement(
|
||
self.offers,
|
||
"offer",
|
||
{
|
||
"id": str(product["plu"]),
|
||
"available": "true" if in_stock else "false",
|
||
"in_stock": "true" if in_stock else "false",
|
||
},
|
||
)
|
||
|
||
# Clean product name before adding to feed
|
||
cleaned_name = self.clean_product_name(product["name"])
|
||
ET.SubElement(offer, "name").text = cleaned_name
|
||
|
||
# Add vendorCode using plu
|
||
ET.SubElement(offer, "vendorCode").text = str(product["plu"])
|
||
|
||
ET.SubElement(offer, "price").text = str(product["prices"]["mainPrice"])
|
||
ET.SubElement(offer, "currencyId").text = "PLN"
|
||
ET.SubElement(offer, "categoryId").text = str(
|
||
product["local_category_id"]
|
||
) # якщо у тебе є локальна категорія
|
||
ET.SubElement(offer, "portal_category_id").text = str(
|
||
product["portal_category_id"]
|
||
) # ОБОВ'ЯЗКОВО
|
||
|
||
# Додаємо keywords із назви категорії
|
||
category_name = ""
|
||
local_category_id = product.get("local_category_id")
|
||
if self.categories_data:
|
||
match = next(
|
||
(
|
||
c
|
||
for c in self.categories_data
|
||
if str(c["id"]) == str(local_category_id)
|
||
),
|
||
None,
|
||
)
|
||
if match:
|
||
category_name = match["name"]
|
||
|
||
keywords = product.get("keywords", "")
|
||
combined_keywords = category_name
|
||
if keywords:
|
||
combined_keywords += f", {keywords}"
|
||
|
||
ET.SubElement(offer, "keywords").text = combined_keywords
|
||
|
||
# Description with images
|
||
if "description" in product:
|
||
description_html = "<div>"
|
||
for desc in product["description"]:
|
||
description_html += f"<h3>{desc['title']}</h3>"
|
||
description_html += f"<p>{desc['text']}</p>"
|
||
if desc["image"].get("local_path") and not self.use_original_urls:
|
||
img_url = self.get_image_url(desc["image"]["local_path"])
|
||
description_html += f'<img src="{img_url}" alt="{desc["title"]}"/>'
|
||
elif desc["image"].get("url") and self.use_original_urls:
|
||
img_url = desc["image"]["url"]
|
||
description_html += f'<img src="{img_url}" alt="{desc["title"]}"/>'
|
||
description_html += "</div>"
|
||
|
||
description_elem = ET.SubElement(offer, "description")
|
||
description_elem.text = description_html
|
||
|
||
# Product images
|
||
for img in product["images"][:10]:
|
||
if self.use_original_urls:
|
||
img_url = img["url"]
|
||
else:
|
||
if img.get("local_path"):
|
||
img_url = self.get_image_url(img["local_path"])
|
||
else:
|
||
continue
|
||
ET.SubElement(offer, "picture").text = img_url
|
||
|
||
# Attributes as params
|
||
params = self.process_attributes(product["attributes"])
|
||
for param in params:
|
||
param_elem = ET.SubElement(offer, "param", {"name": param["name"]})
|
||
param_elem.text = str(param["value"])
|
||
|
||
# URL
|
||
ET.SubElement(offer, "url").text = product["url"]
|
||
|
||
def generate_yml(self, products: List[Dict], output_yml: str) -> bool:
|
||
"""
|
||
Generate YML feed from products data
|
||
|
||
:param products: List of product dictionaries
|
||
:param output_yml: Path to output YML file
|
||
:return: True if successful, False otherwise
|
||
"""
|
||
try:
|
||
# Ensure a category exists
|
||
if not list(self.categories):
|
||
raise ValueError("No categories added to the YML feed.")
|
||
|
||
# Add offers for each product
|
||
for product in products:
|
||
self.add_offer(product)
|
||
|
||
# Write the XML tree
|
||
tree = ET.ElementTree(self.root)
|
||
tree.write(output_yml, encoding="UTF-8", xml_declaration=True)
|
||
print(f"YML feed generated: {output_yml}")
|
||
return True
|
||
|
||
except Exception as e:
|
||
print(f"Error generating YML feed: {str(e)}")
|
||
return False
|
||
|
||
|
||
def main():
|
||
"""
|
||
Example usage with command line arguments
|
||
"""
|
||
import sys
|
||
|
||
if len(sys.argv) < 2:
|
||
print("Usage: python feed_generator.py input.json [output.yml]")
|
||
sys.exit(1)
|
||
|
||
input_json = sys.argv[1]
|
||
output_yml = sys.argv[2] if len(sys.argv) > 2 else None
|
||
|
||
generator = RobotVacuumYMLGenerator()
|
||
generator.generate_yml(input_json, output_yml)
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|