diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000000000000000000000000000000000..7cdc25a55df45779ef107e690287a29120fc8cd1 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +*.py[cod] +/venv diff --git a/requirements.txt b/requirements.txt index 8098e7b9de75342569d2232691a3cd9e11ab735a..46364b31029b2ee8b75fa852a574c379cf0d7a3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,2 @@ +awesome-slugify==1.6.5 pep8==1.7.0 diff --git a/wooify b/wooify index a81ec92e486dad599b548a76f68feff6e4e38b86..4ce8da752fea5fab21d1ffe03ba08060b454434d 100644 --- a/wooify +++ b/wooify @@ -15,8 +15,14 @@ import re import json import datetime from xml.etree import ElementTree +from slugify import UniqueSlugify -WOO_IMP_EXP_FIELDS = [ +ECWID_CATEGORY_DELIMITER = ' / ' +ECWID_CSV_DELIMITER = ';' + +WP_SLUG_REGEX = re.compile('[\W-]+') + +WOO_IMP_EXP_PRODUCT_FIELDS = [ # ("FieldName", "Default Value"), ("Id", ""), ("Product Name", ""), @@ -79,31 +85,46 @@ WOO_IMP_EXP_FIELDS = [ ("Comment Status", "closed"), ("Ping Status", "open") ] +WOO_IMP_EXP_CATEGORY_FIELDS = [ + # ("FieldName", "Default Value"), + ("Id", ''), + ("Name", ''), + ("Slug", ''), + ("Term Taxonomy Id", ''), + ("Taxonomy", 'product_cat'), + ("Parent Id", ''), + ("Parent Slug", ''), + ("Description", ''), + ("Term Group", 0), + ("Count", ''), + ("Category Image", ''), + ("Woocommerce Term Meta", ''), +] def strip_html(html_string): """Remove all HTML tags from a string.""" + # TODO: Dont actually strip all.. Just remove any inline styles. return ''.join(ElementTree.fromstring("<body>{0}</body>" .format(html_string)).itertext()) def ecwid_parser(r, r_id=None): - alphanum = re.compile('[\W_]+') try: r["description"] = strip_html(r["description"]) except ElementTree.ParseError: - print("Malformed HTML: " + print("Malformed HTML could not be stripped: " "{0} ({1})".format(r["name"], (r["sku"]))) - woo_row = dict(WOO_IMP_EXP_FIELDS) - woo_row.update({ + raw_category_names = list(filter(None, [r["category{0}".format(c)] + if r["category{0}".format(c)] else None + for c in range(1, 3)])) + product = dict(WOO_IMP_EXP_PRODUCT_FIELDS) + product.update({ "Id": r["product_id"], "Product Name": r["name"], - "Categories": json.dumps(list(filter(None, [ - {"name": r["category{0}".format(c)], - "slug": alphanum.sub('', r["category{0}".format(c)]).lower()} - if r["category{0}".format(c)] else None for c in range(1, 3)]))), + "Categories": raw_category_names, "SKU": r["sku"], - "Weight": r["weight"], + "Weight": r["weight"] if float(r["weight"]) > 0 else '', "Product Images": r["image"], "Product Image Set": "yes" if r["image"] else "no", "Short Description": r["seo_description"], @@ -113,35 +134,91 @@ def ecwid_parser(r, r_id=None): "Visibility": "visible" if r["enabled"] == "yes" else "invisible", "Purchaseable": r["enabled"] or "no", }) - return woo_row + return product + + +def expand_category(name, cat_id, slugify=None, delimiter=None, products=None): + """Build a rich category entry from minimal input.""" + slugify = slugify or UniqueSlugify() + return { + 'Id': cat_id, + 'Term Taxonomy Id': cat_id, + 'Name': (name[name.rfind(delimiter):] + if delimiter and delimiter in name else name), + 'Slug': slugify(name), + 'Parent Raw Name': (name[:name.rfind(delimiter)] + if delimiter and delimiter in name + else ''), + 'Count': (sum([1 for p in products if name in p['Categories']]) + if products else 0), + } -def make_woocommerce_csv(input_file, output_file): +def add_missing_parents(categories): + """Find categories whose parents do not exist, and recursively add + those parents to the list.""" + for i, c in categories.items(): + if not c['Parent Raw Name']: + continue + if c['Parent Raw Name'] in categories: + continue + # TODO: this + return categories + + +def make_woocommerce_csv(input_file, products_file, categories_file=None): """Convert input_file, a CSV of eCommerce products, into a format that can be imported by WooCommerce.""" - count = 0 - with open(input_file) as csv_in, open(output_file, "w") as csv_out: - reader = csv.DictReader(csv_in, delimiter=';') - writer = csv.DictWriter(csv_out, - fieldnames=[f for f, d in WOO_IMP_EXP_FIELDS]) - writer.writeheader() - for row in reader: - row = ecwid_parser(row) - writer.writerow(row) - count += 1 - return count + with open(input_file) as csv_in,\ + open(products_file, 'w') as csv_products,\ + categories_file and open(categories_file, 'w') as csv_categories: + ecwid_reader = csv.DictReader(csv_in, delimiter=ECWID_CSV_DELIMITER) + products_writer = csv.DictWriter( + csv_products, + fieldnames=[f for f, d in WOO_IMP_EXP_PRODUCT_FIELDS]) + products_writer.writeheader() + raw_category_names = [] + products = [] + for row in ecwid_reader: + product = ecwid_parser(row) + products.append(product) + raw_category_names.extend(product['Categories']) + raw_category_names = list(set(raw_category_names)) # Uniquify + categories = {} + slugify = UniqueSlugify() + for idx, name in enumerate(raw_category_names): + categories[name] = expand_category(name, idx, slugify, + ECWID_CATEGORY_DELIMITER, + products) + categories = add_missing_parents(categories) + for i, c in categories.items(): + if c['Parent Raw Name']: + parent = categories[c['Parent Raw Name']] + c['Parent Id'] = parent['Id'] + c['Parent Slug'] = parent['Slug'] + else: + c['Parent Id'] = '' + c['Parent Slug'] = '' + del c['Parent Raw Name'] + for product in products: + products_writer.writerow(product) -if __name__ == '__main__': - try: - output_file = sys.argv[2] - except IndexError: - output_file = 'woocommerce_products.csv' + if csv_gategories: + categories_writer = csv.DictWriter( + csv_categories, + fieldnames=[f for f, d in WOO_IMP_EXP_CATEGORY_FIELDS]) + categories_writer.writeheader() + for category in list(categories.values()): + categories_writer.writerow(category) +if __name__ == '__main__': try: input_file = sys.argv[1] except IndexError: - print("\n Usage: {0} input_file [output_file]\n".format(sys.argv[0])) + print("\n Usage: {0} input_file\n".format(sys.argv[0])) sys.exit() - make_woocommerce_csv(input_file, output_file) + make_woocommerce_csv(input_file, + 'woocommerce_products.csv', + 'woocommerce_categories.csv')