first commit
This commit is contained in:
220
feed_generator.py
Normal file
220
feed_generator.py
Normal file
@@ -0,0 +1,220 @@
|
||||
import json
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import List, Dict
|
||||
from datetime import datetime
|
||||
from urllib.parse import urljoin
|
||||
|
||||
|
||||
class RobotVacuumYMLGenerator:
|
||||
def __init__(
|
||||
self,
|
||||
shop_name: str = "Euro Electronics",
|
||||
base_url: str = "https://mario.mrakells.pp.ua",
|
||||
use_original_urls: bool = False,
|
||||
):
|
||||
"""
|
||||
Initialize YML feed generator
|
||||
|
||||
:param shop_name: Name of the shop
|
||||
:param base_url: Base URL for image hosting
|
||||
:param use_original_urls: If True, use original image URLs instead of local ones
|
||||
"""
|
||||
self.root = ET.Element(
|
||||
"yml_catalog", {"date": datetime.now().strftime("%Y-%m-%d %H:%M")}
|
||||
)
|
||||
self.shop = ET.SubElement(self.root, "shop")
|
||||
ET.SubElement(self.shop, "name").text = shop_name
|
||||
self.base_url = base_url
|
||||
self.use_original_urls = use_original_urls
|
||||
|
||||
self.categories = ET.SubElement(self.shop, "categories")
|
||||
self.offers = ET.SubElement(self.shop, "offers")
|
||||
|
||||
def add_category(self, category_id: str, category_name: str, parent_id: str = None):
|
||||
"""
|
||||
Add category to YML feed
|
||||
|
||||
:param category_id: Category ID
|
||||
:param category_name: Category name
|
||||
:param parent_id: Parent category ID (optional)
|
||||
"""
|
||||
attrs = {"id": category_id}
|
||||
if parent_id:
|
||||
attrs["parentId"] = parent_id
|
||||
|
||||
category = ET.SubElement(self.categories, "category", attrs)
|
||||
category.text = category_name
|
||||
|
||||
def get_image_url(self, local_path: str) -> str:
|
||||
"""
|
||||
Convert local path to full URL, normalizing path separators
|
||||
|
||||
:param local_path: Local path to image file
|
||||
:return: Full URL with normalized path separators
|
||||
"""
|
||||
if not local_path:
|
||||
return None
|
||||
|
||||
# Normalize path separators to forward slashes
|
||||
normalized_path = local_path.replace("\\", "/")
|
||||
return urljoin(self.base_url, normalized_path)
|
||||
|
||||
def process_attributes(self, attributes: List[Dict]) -> List[Dict]:
|
||||
"""
|
||||
Convert attributes to param format for YML
|
||||
|
||||
:param attributes: List of attribute dictionaries
|
||||
:return: List of param dictionaries
|
||||
"""
|
||||
params = []
|
||||
for attr in attributes:
|
||||
value = attr["value"]
|
||||
|
||||
# Handle single or multiple values
|
||||
if isinstance(value, list):
|
||||
value = " | ".join(str(v) for v in value)
|
||||
|
||||
params.append({"name": attr["name"], "value": value})
|
||||
return params
|
||||
|
||||
def clean_product_name(self, name: str) -> str:
|
||||
"""
|
||||
Очищает название продукта, удаляя кириллические слова после латинских символов
|
||||
|
||||
:param name: Исходное название продукта
|
||||
:return: Очищенное название
|
||||
"""
|
||||
# Разбиваем строку на слова
|
||||
words = name.split()
|
||||
cleaned_words = []
|
||||
last_latin_index = -1
|
||||
|
||||
# Проходим по словам и ищем последнее слово с латиницей
|
||||
for i, word in enumerate(words):
|
||||
# Проверяем, содержит ли слово латинские символы
|
||||
if any(ord("a") <= ord(c.lower()) <= ord("z") for c in word):
|
||||
last_latin_index = i
|
||||
|
||||
# Если нашли латинские символы, берём все слова до следующего после последнего латинского
|
||||
if last_latin_index != -1:
|
||||
cleaned_words = words[: last_latin_index + 1]
|
||||
else:
|
||||
cleaned_words = words
|
||||
|
||||
return " ".join(cleaned_words)
|
||||
|
||||
def add_offer(self, product: Dict):
|
||||
"""
|
||||
Add a robot vacuum cleaner offer to the YML feed
|
||||
|
||||
:param product: Product dictionary from JSON
|
||||
"""
|
||||
|
||||
in_stock = product.get('in_stock', False)
|
||||
|
||||
offer = ET.SubElement(self.offers, 'offer', {
|
||||
'id': str(product['plu']),
|
||||
'available': 'true' if in_stock else 'false',
|
||||
'in_stock': 'true' if in_stock else 'false'
|
||||
})
|
||||
|
||||
# Clean product name before adding to feed
|
||||
cleaned_name = self.clean_product_name(product["name"])
|
||||
ET.SubElement(offer, "name").text = cleaned_name
|
||||
|
||||
# Add vendorCode using plu
|
||||
ET.SubElement(offer, "vendorCode").text = str(product["plu"])
|
||||
|
||||
ET.SubElement(offer, "price").text = str(product["prices"]["mainPrice"])
|
||||
ET.SubElement(offer, "currencyId").text = "PLN"
|
||||
ET.SubElement(offer, "categoryId").text = str(
|
||||
product["local_category_id"]
|
||||
) # якщо у тебе є локальна категорія
|
||||
ET.SubElement(offer, "portal_category_id").text = str(
|
||||
product["portal_category_id"]
|
||||
) # ОБОВ'ЯЗКОВО
|
||||
|
||||
# Description with images
|
||||
if "description" in product:
|
||||
description_html = "<div>"
|
||||
for desc in product["description"]:
|
||||
description_html += f"<h3>{desc['title']}</h3>"
|
||||
description_html += f"<p>{desc['text']}</p>"
|
||||
if desc["image"].get("local_path") and not self.use_original_urls:
|
||||
img_url = self.get_image_url(desc["image"]["local_path"])
|
||||
description_html += f'<img src="{img_url}" alt="{desc["title"]}"/>'
|
||||
elif desc["image"].get("url") and self.use_original_urls:
|
||||
img_url = desc["image"]["url"]
|
||||
description_html += f'<img src="{img_url}" alt="{desc["title"]}"/>'
|
||||
description_html += "</div>"
|
||||
|
||||
description_elem = ET.SubElement(offer, "description")
|
||||
description_elem.text = description_html
|
||||
|
||||
# Product images
|
||||
for img in product["images"][:10]:
|
||||
if self.use_original_urls:
|
||||
img_url = img["url"]
|
||||
else:
|
||||
if img.get("local_path"):
|
||||
img_url = self.get_image_url(img["local_path"])
|
||||
else:
|
||||
continue
|
||||
ET.SubElement(offer, "picture").text = img_url
|
||||
|
||||
# Attributes as params
|
||||
params = self.process_attributes(product["attributes"])
|
||||
for param in params:
|
||||
param_elem = ET.SubElement(offer, "param", {"name": param["name"]})
|
||||
param_elem.text = str(param["value"])
|
||||
|
||||
# URL
|
||||
ET.SubElement(offer, "url").text = product["url"]
|
||||
|
||||
def generate_yml(self, products: List[Dict], output_yml: str) -> bool:
|
||||
"""
|
||||
Generate YML feed from products data
|
||||
|
||||
:param products: List of product dictionaries
|
||||
:param output_yml: Path to output YML file
|
||||
:return: True if successful, False otherwise
|
||||
"""
|
||||
try:
|
||||
# Ensure a category exists
|
||||
if not list(self.categories):
|
||||
raise ValueError("No categories added to the YML feed.")
|
||||
|
||||
# Add offers for each product
|
||||
for product in products:
|
||||
self.add_offer(product)
|
||||
|
||||
# Write the XML tree
|
||||
tree = ET.ElementTree(self.root)
|
||||
tree.write(output_yml, encoding="UTF-8", xml_declaration=True)
|
||||
print(f"YML feed generated: {output_yml}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error generating YML feed: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
Example usage with command line arguments
|
||||
"""
|
||||
import sys
|
||||
|
||||
if len(sys.argv) < 2:
|
||||
print("Usage: python feed_generator.py input.json [output.yml]")
|
||||
sys.exit(1)
|
||||
|
||||
input_json = sys.argv[1]
|
||||
output_yml = sys.argv[2] if len(sys.argv) > 2 else None
|
||||
|
||||
generator = RobotVacuumYMLGenerator()
|
||||
generator.generate_yml(input_json, output_yml)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user