first commit

This commit is contained in:
2025-04-17 13:56:40 +03:00
commit e7e6382a10
10 changed files with 2608 additions and 0 deletions

220
feed_generator.py Normal file
View File

@@ -0,0 +1,220 @@
import json
import xml.etree.ElementTree as ET
from typing import List, Dict
from datetime import datetime
from urllib.parse import urljoin
class RobotVacuumYMLGenerator:
def __init__(
self,
shop_name: str = "Euro Electronics",
base_url: str = "https://mario.mrakells.pp.ua",
use_original_urls: bool = False,
):
"""
Initialize YML feed generator
:param shop_name: Name of the shop
:param base_url: Base URL for image hosting
:param use_original_urls: If True, use original image URLs instead of local ones
"""
self.root = ET.Element(
"yml_catalog", {"date": datetime.now().strftime("%Y-%m-%d %H:%M")}
)
self.shop = ET.SubElement(self.root, "shop")
ET.SubElement(self.shop, "name").text = shop_name
self.base_url = base_url
self.use_original_urls = use_original_urls
self.categories = ET.SubElement(self.shop, "categories")
self.offers = ET.SubElement(self.shop, "offers")
def add_category(self, category_id: str, category_name: str, parent_id: str = None):
"""
Add category to YML feed
:param category_id: Category ID
:param category_name: Category name
:param parent_id: Parent category ID (optional)
"""
attrs = {"id": category_id}
if parent_id:
attrs["parentId"] = parent_id
category = ET.SubElement(self.categories, "category", attrs)
category.text = category_name
def get_image_url(self, local_path: str) -> str:
"""
Convert local path to full URL, normalizing path separators
:param local_path: Local path to image file
:return: Full URL with normalized path separators
"""
if not local_path:
return None
# Normalize path separators to forward slashes
normalized_path = local_path.replace("\\", "/")
return urljoin(self.base_url, normalized_path)
def process_attributes(self, attributes: List[Dict]) -> List[Dict]:
"""
Convert attributes to param format for YML
:param attributes: List of attribute dictionaries
:return: List of param dictionaries
"""
params = []
for attr in attributes:
value = attr["value"]
# Handle single or multiple values
if isinstance(value, list):
value = " | ".join(str(v) for v in value)
params.append({"name": attr["name"], "value": value})
return params
def clean_product_name(self, name: str) -> str:
"""
Очищает название продукта, удаляя кириллические слова после латинских символов
:param name: Исходное название продукта
:return: Очищенное название
"""
# Разбиваем строку на слова
words = name.split()
cleaned_words = []
last_latin_index = -1
# Проходим по словам и ищем последнее слово с латиницей
for i, word in enumerate(words):
# Проверяем, содержит ли слово латинские символы
if any(ord("a") <= ord(c.lower()) <= ord("z") for c in word):
last_latin_index = i
# Если нашли латинские символы, берём все слова до следующего после последнего латинского
if last_latin_index != -1:
cleaned_words = words[: last_latin_index + 1]
else:
cleaned_words = words
return " ".join(cleaned_words)
def add_offer(self, product: Dict):
"""
Add a robot vacuum cleaner offer to the YML feed
:param product: Product dictionary from JSON
"""
in_stock = product.get('in_stock', False)
offer = ET.SubElement(self.offers, 'offer', {
'id': str(product['plu']),
'available': 'true' if in_stock else 'false',
'in_stock': 'true' if in_stock else 'false'
})
# Clean product name before adding to feed
cleaned_name = self.clean_product_name(product["name"])
ET.SubElement(offer, "name").text = cleaned_name
# Add vendorCode using plu
ET.SubElement(offer, "vendorCode").text = str(product["plu"])
ET.SubElement(offer, "price").text = str(product["prices"]["mainPrice"])
ET.SubElement(offer, "currencyId").text = "PLN"
ET.SubElement(offer, "categoryId").text = str(
product["local_category_id"]
) # якщо у тебе є локальна категорія
ET.SubElement(offer, "portal_category_id").text = str(
product["portal_category_id"]
) # ОБОВ'ЯЗКОВО
# Description with images
if "description" in product:
description_html = "<div>"
for desc in product["description"]:
description_html += f"<h3>{desc['title']}</h3>"
description_html += f"<p>{desc['text']}</p>"
if desc["image"].get("local_path") and not self.use_original_urls:
img_url = self.get_image_url(desc["image"]["local_path"])
description_html += f'<img src="{img_url}" alt="{desc["title"]}"/>'
elif desc["image"].get("url") and self.use_original_urls:
img_url = desc["image"]["url"]
description_html += f'<img src="{img_url}" alt="{desc["title"]}"/>'
description_html += "</div>"
description_elem = ET.SubElement(offer, "description")
description_elem.text = description_html
# Product images
for img in product["images"][:10]:
if self.use_original_urls:
img_url = img["url"]
else:
if img.get("local_path"):
img_url = self.get_image_url(img["local_path"])
else:
continue
ET.SubElement(offer, "picture").text = img_url
# Attributes as params
params = self.process_attributes(product["attributes"])
for param in params:
param_elem = ET.SubElement(offer, "param", {"name": param["name"]})
param_elem.text = str(param["value"])
# URL
ET.SubElement(offer, "url").text = product["url"]
def generate_yml(self, products: List[Dict], output_yml: str) -> bool:
"""
Generate YML feed from products data
:param products: List of product dictionaries
:param output_yml: Path to output YML file
:return: True if successful, False otherwise
"""
try:
# Ensure a category exists
if not list(self.categories):
raise ValueError("No categories added to the YML feed.")
# Add offers for each product
for product in products:
self.add_offer(product)
# Write the XML tree
tree = ET.ElementTree(self.root)
tree.write(output_yml, encoding="UTF-8", xml_declaration=True)
print(f"YML feed generated: {output_yml}")
return True
except Exception as e:
print(f"Error generating YML feed: {str(e)}")
return False
def main():
"""
Example usage with command line arguments
"""
import sys
if len(sys.argv) < 2:
print("Usage: python feed_generator.py input.json [output.yml]")
sys.exit(1)
input_json = sys.argv[1]
output_yml = sys.argv[2] if len(sys.argv) > 2 else None
generator = RobotVacuumYMLGenerator()
generator.generate_yml(input_json, output_yml)
if __name__ == "__main__":
main()