Skip to content
Snippets Groups Projects
Commit 23ff6239 authored by Anton Sarukhanov's avatar Anton Sarukhanov
Browse files

WIP: Add category support.

parent 57021eca
No related branches found
No related tags found
No related merge requests found
Pipeline #30 failed with stage
in 29 seconds
*.py[cod]
/venv
awesome-slugify==1.6.5
pep8==1.7.0 pep8==1.7.0
...@@ -15,8 +15,14 @@ import re ...@@ -15,8 +15,14 @@ import re
import json import json
import datetime import datetime
from xml.etree import ElementTree from xml.etree import ElementTree
from slugify import UniqueSlugify
WOO_IMP_EXP_FIELDS = [ ECWID_CATEGORY_DELIMITER = ' / '
ECWID_CSV_DELIMITER = ';'
WP_SLUG_REGEX = re.compile('[\W-]+')
WOO_IMP_EXP_PRODUCT_FIELDS = [
# ("FieldName", "Default Value"), # ("FieldName", "Default Value"),
("Id", ""), ("Id", ""),
("Product Name", ""), ("Product Name", ""),
...@@ -79,31 +85,46 @@ WOO_IMP_EXP_FIELDS = [ ...@@ -79,31 +85,46 @@ WOO_IMP_EXP_FIELDS = [
("Comment Status", "closed"), ("Comment Status", "closed"),
("Ping Status", "open") ("Ping Status", "open")
] ]
WOO_IMP_EXP_CATEGORY_FIELDS = [
# ("FieldName", "Default Value"),
("Id", ''),
("Name", ''),
("Slug", ''),
("Term Taxonomy Id", ''),
("Taxonomy", 'product_cat'),
("Parent Id", ''),
("Parent Slug", ''),
("Description", ''),
("Term Group", 0),
("Count", ''),
("Category Image", ''),
("Woocommerce Term Meta", ''),
]
def strip_html(html_string): def strip_html(html_string):
"""Remove all HTML tags from a string.""" """Remove all HTML tags from a string."""
# TODO: Dont actually strip all.. Just remove any inline styles.
return ''.join(ElementTree.fromstring("<body>{0}</body>" return ''.join(ElementTree.fromstring("<body>{0}</body>"
.format(html_string)).itertext()) .format(html_string)).itertext())
def ecwid_parser(r, r_id=None): def ecwid_parser(r, r_id=None):
alphanum = re.compile('[\W_]+')
try: try:
r["description"] = strip_html(r["description"]) r["description"] = strip_html(r["description"])
except ElementTree.ParseError: except ElementTree.ParseError:
print("Malformed HTML: " print("Malformed HTML could not be stripped: "
"{0} ({1})".format(r["name"], (r["sku"]))) "{0} ({1})".format(r["name"], (r["sku"])))
woo_row = dict(WOO_IMP_EXP_FIELDS) raw_category_names = list(filter(None, [r["category{0}".format(c)]
woo_row.update({ if r["category{0}".format(c)] else None
for c in range(1, 3)]))
product = dict(WOO_IMP_EXP_PRODUCT_FIELDS)
product.update({
"Id": r["product_id"], "Id": r["product_id"],
"Product Name": r["name"], "Product Name": r["name"],
"Categories": json.dumps(list(filter(None, [ "Categories": raw_category_names,
{"name": r["category{0}".format(c)],
"slug": alphanum.sub('', r["category{0}".format(c)]).lower()}
if r["category{0}".format(c)] else None for c in range(1, 3)]))),
"SKU": r["sku"], "SKU": r["sku"],
"Weight": r["weight"], "Weight": r["weight"] if float(r["weight"]) > 0 else '',
"Product Images": r["image"], "Product Images": r["image"],
"Product Image Set": "yes" if r["image"] else "no", "Product Image Set": "yes" if r["image"] else "no",
"Short Description": r["seo_description"], "Short Description": r["seo_description"],
...@@ -113,35 +134,91 @@ def ecwid_parser(r, r_id=None): ...@@ -113,35 +134,91 @@ def ecwid_parser(r, r_id=None):
"Visibility": "visible" if r["enabled"] == "yes" else "invisible", "Visibility": "visible" if r["enabled"] == "yes" else "invisible",
"Purchaseable": r["enabled"] or "no", "Purchaseable": r["enabled"] or "no",
}) })
return woo_row return product
def expand_category(name, cat_id, slugify=None, delimiter=None, products=None):
"""Build a rich category entry from minimal input."""
slugify = slugify or UniqueSlugify()
return {
'Id': cat_id,
'Term Taxonomy Id': cat_id,
'Name': (name[name.rfind(delimiter):]
if delimiter and delimiter in name else name),
'Slug': slugify(name),
'Parent Raw Name': (name[:name.rfind(delimiter)]
if delimiter and delimiter in name
else ''),
'Count': (sum([1 for p in products if name in p['Categories']])
if products else 0),
}
def make_woocommerce_csv(input_file, output_file): def add_missing_parents(categories):
"""Find categories whose parents do not exist, and recursively add
those parents to the list."""
for i, c in categories.items():
if not c['Parent Raw Name']:
continue
if c['Parent Raw Name'] in categories:
continue
# TODO: this
return categories
def make_woocommerce_csv(input_file, products_file, categories_file=None):
"""Convert input_file, a CSV of eCommerce products, into """Convert input_file, a CSV of eCommerce products, into
a format that can be imported by WooCommerce.""" a format that can be imported by WooCommerce."""
count = 0 with open(input_file) as csv_in,\
with open(input_file) as csv_in, open(output_file, "w") as csv_out: open(products_file, 'w') as csv_products,\
reader = csv.DictReader(csv_in, delimiter=';') categories_file and open(categories_file, 'w') as csv_categories:
writer = csv.DictWriter(csv_out, ecwid_reader = csv.DictReader(csv_in, delimiter=ECWID_CSV_DELIMITER)
fieldnames=[f for f, d in WOO_IMP_EXP_FIELDS]) products_writer = csv.DictWriter(
writer.writeheader() csv_products,
for row in reader: fieldnames=[f for f, d in WOO_IMP_EXP_PRODUCT_FIELDS])
row = ecwid_parser(row) products_writer.writeheader()
writer.writerow(row) raw_category_names = []
count += 1 products = []
return count for row in ecwid_reader:
product = ecwid_parser(row)
products.append(product)
raw_category_names.extend(product['Categories'])
raw_category_names = list(set(raw_category_names)) # Uniquify
categories = {}
slugify = UniqueSlugify()
for idx, name in enumerate(raw_category_names):
categories[name] = expand_category(name, idx, slugify,
ECWID_CATEGORY_DELIMITER,
products)
categories = add_missing_parents(categories)
for i, c in categories.items():
if c['Parent Raw Name']:
parent = categories[c['Parent Raw Name']]
c['Parent Id'] = parent['Id']
c['Parent Slug'] = parent['Slug']
else:
c['Parent Id'] = ''
c['Parent Slug'] = ''
del c['Parent Raw Name']
for product in products:
products_writer.writerow(product)
if __name__ == '__main__': if csv_gategories:
try: categories_writer = csv.DictWriter(
output_file = sys.argv[2] csv_categories,
except IndexError: fieldnames=[f for f, d in WOO_IMP_EXP_CATEGORY_FIELDS])
output_file = 'woocommerce_products.csv' categories_writer.writeheader()
for category in list(categories.values()):
categories_writer.writerow(category)
if __name__ == '__main__':
try: try:
input_file = sys.argv[1] input_file = sys.argv[1]
except IndexError: except IndexError:
print("\n Usage: {0} input_file [output_file]\n".format(sys.argv[0])) print("\n Usage: {0} input_file\n".format(sys.argv[0]))
sys.exit() sys.exit()
make_woocommerce_csv(input_file, output_file) make_woocommerce_csv(input_file,
'woocommerce_products.csv',
'woocommerce_categories.csv')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment