Skip to content
Snippets Groups Projects
Commit 23ff6239 authored by Anton Sarukhanov's avatar Anton Sarukhanov
Browse files

WIP: Add category support.

parent 57021eca
No related branches found
No related tags found
No related merge requests found
Pipeline #30 failed with stage
in 29 seconds
*.py[cod]
/venv
awesome-slugify==1.6.5
pep8==1.7.0
......@@ -15,8 +15,14 @@ import re
import json
import datetime
from xml.etree import ElementTree
from slugify import UniqueSlugify
WOO_IMP_EXP_FIELDS = [
ECWID_CATEGORY_DELIMITER = ' / '
ECWID_CSV_DELIMITER = ';'
WP_SLUG_REGEX = re.compile('[\W-]+')
WOO_IMP_EXP_PRODUCT_FIELDS = [
# ("FieldName", "Default Value"),
("Id", ""),
("Product Name", ""),
......@@ -79,31 +85,46 @@ WOO_IMP_EXP_FIELDS = [
("Comment Status", "closed"),
("Ping Status", "open")
]
WOO_IMP_EXP_CATEGORY_FIELDS = [
# ("FieldName", "Default Value"),
("Id", ''),
("Name", ''),
("Slug", ''),
("Term Taxonomy Id", ''),
("Taxonomy", 'product_cat'),
("Parent Id", ''),
("Parent Slug", ''),
("Description", ''),
("Term Group", 0),
("Count", ''),
("Category Image", ''),
("Woocommerce Term Meta", ''),
]
def strip_html(html_string):
"""Remove all HTML tags from a string."""
# TODO: Dont actually strip all.. Just remove any inline styles.
return ''.join(ElementTree.fromstring("<body>{0}</body>"
.format(html_string)).itertext())
def ecwid_parser(r, r_id=None):
alphanum = re.compile('[\W_]+')
try:
r["description"] = strip_html(r["description"])
except ElementTree.ParseError:
print("Malformed HTML: "
print("Malformed HTML could not be stripped: "
"{0} ({1})".format(r["name"], (r["sku"])))
woo_row = dict(WOO_IMP_EXP_FIELDS)
woo_row.update({
raw_category_names = list(filter(None, [r["category{0}".format(c)]
if r["category{0}".format(c)] else None
for c in range(1, 3)]))
product = dict(WOO_IMP_EXP_PRODUCT_FIELDS)
product.update({
"Id": r["product_id"],
"Product Name": r["name"],
"Categories": json.dumps(list(filter(None, [
{"name": r["category{0}".format(c)],
"slug": alphanum.sub('', r["category{0}".format(c)]).lower()}
if r["category{0}".format(c)] else None for c in range(1, 3)]))),
"Categories": raw_category_names,
"SKU": r["sku"],
"Weight": r["weight"],
"Weight": r["weight"] if float(r["weight"]) > 0 else '',
"Product Images": r["image"],
"Product Image Set": "yes" if r["image"] else "no",
"Short Description": r["seo_description"],
......@@ -113,35 +134,91 @@ def ecwid_parser(r, r_id=None):
"Visibility": "visible" if r["enabled"] == "yes" else "invisible",
"Purchaseable": r["enabled"] or "no",
})
return woo_row
return product
def expand_category(name, cat_id, slugify=None, delimiter=None, products=None):
"""Build a rich category entry from minimal input."""
slugify = slugify or UniqueSlugify()
return {
'Id': cat_id,
'Term Taxonomy Id': cat_id,
'Name': (name[name.rfind(delimiter):]
if delimiter and delimiter in name else name),
'Slug': slugify(name),
'Parent Raw Name': (name[:name.rfind(delimiter)]
if delimiter and delimiter in name
else ''),
'Count': (sum([1 for p in products if name in p['Categories']])
if products else 0),
}
def make_woocommerce_csv(input_file, output_file):
def add_missing_parents(categories):
"""Find categories whose parents do not exist, and recursively add
those parents to the list."""
for i, c in categories.items():
if not c['Parent Raw Name']:
continue
if c['Parent Raw Name'] in categories:
continue
# TODO: this
return categories
def make_woocommerce_csv(input_file, products_file, categories_file=None):
"""Convert input_file, a CSV of eCommerce products, into
a format that can be imported by WooCommerce."""
count = 0
with open(input_file) as csv_in, open(output_file, "w") as csv_out:
reader = csv.DictReader(csv_in, delimiter=';')
writer = csv.DictWriter(csv_out,
fieldnames=[f for f, d in WOO_IMP_EXP_FIELDS])
writer.writeheader()
for row in reader:
row = ecwid_parser(row)
writer.writerow(row)
count += 1
return count
with open(input_file) as csv_in,\
open(products_file, 'w') as csv_products,\
categories_file and open(categories_file, 'w') as csv_categories:
ecwid_reader = csv.DictReader(csv_in, delimiter=ECWID_CSV_DELIMITER)
products_writer = csv.DictWriter(
csv_products,
fieldnames=[f for f, d in WOO_IMP_EXP_PRODUCT_FIELDS])
products_writer.writeheader()
raw_category_names = []
products = []
for row in ecwid_reader:
product = ecwid_parser(row)
products.append(product)
raw_category_names.extend(product['Categories'])
raw_category_names = list(set(raw_category_names)) # Uniquify
categories = {}
slugify = UniqueSlugify()
for idx, name in enumerate(raw_category_names):
categories[name] = expand_category(name, idx, slugify,
ECWID_CATEGORY_DELIMITER,
products)
categories = add_missing_parents(categories)
for i, c in categories.items():
if c['Parent Raw Name']:
parent = categories[c['Parent Raw Name']]
c['Parent Id'] = parent['Id']
c['Parent Slug'] = parent['Slug']
else:
c['Parent Id'] = ''
c['Parent Slug'] = ''
del c['Parent Raw Name']
for product in products:
products_writer.writerow(product)
if __name__ == '__main__':
try:
output_file = sys.argv[2]
except IndexError:
output_file = 'woocommerce_products.csv'
if csv_gategories:
categories_writer = csv.DictWriter(
csv_categories,
fieldnames=[f for f, d in WOO_IMP_EXP_CATEGORY_FIELDS])
categories_writer.writeheader()
for category in list(categories.values()):
categories_writer.writerow(category)
if __name__ == '__main__':
try:
input_file = sys.argv[1]
except IndexError:
print("\n Usage: {0} input_file [output_file]\n".format(sys.argv[0]))
print("\n Usage: {0} input_file\n".format(sys.argv[0]))
sys.exit()
make_woocommerce_csv(input_file, output_file)
make_woocommerce_csv(input_file,
'woocommerce_products.csv',
'woocommerce_categories.csv')
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment