From 23ff62393079f8e8699cf8dc5b4772e38de6fe68 Mon Sep 17 00:00:00 2001
From: Anton Sarukhanov <code@ant.sr>
Date: Sat, 10 Dec 2016 15:34:14 -0500
Subject: [PATCH] WIP: Add category support.

---
 .gitignore       |   2 +
 requirements.txt |   1 +
 wooify           | 137 ++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 110 insertions(+), 30 deletions(-)
 create mode 100644 .gitignore

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7cdc25a
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+*.py[cod]
+/venv
diff --git a/requirements.txt b/requirements.txt
index 8098e7b..46364b3 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1 +1,2 @@
+awesome-slugify==1.6.5
 pep8==1.7.0
diff --git a/wooify b/wooify
index a81ec92..4ce8da7 100644
--- a/wooify
+++ b/wooify
@@ -15,8 +15,14 @@ import re
 import json
 import datetime
 from xml.etree import ElementTree
+from slugify import UniqueSlugify
 
-WOO_IMP_EXP_FIELDS = [
+ECWID_CATEGORY_DELIMITER = ' / '
+ECWID_CSV_DELIMITER = ';'
+
+WP_SLUG_REGEX = re.compile('[\W-]+')
+
+WOO_IMP_EXP_PRODUCT_FIELDS = [
     # ("FieldName", "Default Value"),
     ("Id", ""),
     ("Product Name", ""),
@@ -79,31 +85,46 @@ WOO_IMP_EXP_FIELDS = [
     ("Comment Status", "closed"),
     ("Ping Status", "open")
 ]
+WOO_IMP_EXP_CATEGORY_FIELDS = [
+    # ("FieldName", "Default Value"),
+    ("Id", ''),
+    ("Name", ''),
+    ("Slug", ''),
+    ("Term Taxonomy Id", ''),
+    ("Taxonomy", 'product_cat'),
+    ("Parent Id", ''),
+    ("Parent Slug", ''),
+    ("Description", ''),
+    ("Term Group", 0),
+    ("Count", ''),
+    ("Category Image", ''),
+    ("Woocommerce Term Meta", ''),
+]
 
 
 def strip_html(html_string):
     """Remove all HTML tags from a string."""
+    # TODO: Dont actually strip all.. Just remove any inline styles.
     return ''.join(ElementTree.fromstring("<body>{0}</body>"
                                           .format(html_string)).itertext())
 
 
 def ecwid_parser(r, r_id=None):
-    alphanum = re.compile('[\W_]+')
     try:
         r["description"] = strip_html(r["description"])
     except ElementTree.ParseError:
-        print("Malformed HTML: "
+        print("Malformed HTML could not be stripped: "
               "{0} ({1})".format(r["name"], (r["sku"])))
-    woo_row = dict(WOO_IMP_EXP_FIELDS)
-    woo_row.update({
+    raw_category_names = list(filter(None, [r["category{0}".format(c)]
+                                     if r["category{0}".format(c)] else None
+                                     for c in range(1, 3)]))
+    product = dict(WOO_IMP_EXP_PRODUCT_FIELDS)
+    product.update({
         "Id": r["product_id"],
         "Product Name": r["name"],
-        "Categories": json.dumps(list(filter(None, [
-            {"name": r["category{0}".format(c)],
-             "slug": alphanum.sub('', r["category{0}".format(c)]).lower()}
-            if r["category{0}".format(c)] else None for c in range(1, 3)]))),
+        "Categories": raw_category_names,
         "SKU": r["sku"],
-        "Weight": r["weight"],
+        "Weight": r["weight"] if float(r["weight"]) > 0 else '',
         "Product Images": r["image"],
         "Product Image Set": "yes" if r["image"] else "no",
         "Short Description": r["seo_description"],
@@ -113,35 +134,91 @@ def ecwid_parser(r, r_id=None):
         "Visibility": "visible" if r["enabled"] == "yes" else "invisible",
         "Purchaseable": r["enabled"] or "no",
     })
-    return woo_row
+    return product
+
+
+def expand_category(name, cat_id, slugify=None, delimiter=None, products=None):
+    """Build a rich category entry from minimal input."""
+    slugify = slugify or UniqueSlugify()
+    return {
+        'Id': cat_id,
+        'Term Taxonomy Id': cat_id,
+        'Name': (name[name.rfind(delimiter):]
+                 if delimiter and delimiter in name else name),
+        'Slug': slugify(name),
+        'Parent Raw Name': (name[:name.rfind(delimiter)]
+                            if delimiter and delimiter in name
+                            else ''),
+        'Count': (sum([1 for p in products if name in p['Categories']])
+                  if products else 0),
+    }
 
 
-def make_woocommerce_csv(input_file, output_file):
+def add_missing_parents(categories):
+    """Find categories whose parents do not exist, and recursively add
+       those parents to the list."""
+    for i, c in categories.items():
+        if not c['Parent Raw Name']:
+            continue
+        if c['Parent Raw Name'] in categories:
+            continue
+        # TODO: this
+    return categories
+
+
+def make_woocommerce_csv(input_file, products_file, categories_file=None):
     """Convert input_file, a CSV of eCommerce products, into
        a format that can be imported by WooCommerce."""
-    count = 0
-    with open(input_file) as csv_in, open(output_file, "w") as csv_out:
-        reader = csv.DictReader(csv_in, delimiter=';')
-        writer = csv.DictWriter(csv_out,
-                                fieldnames=[f for f, d in WOO_IMP_EXP_FIELDS])
-        writer.writeheader()
-        for row in reader:
-            row = ecwid_parser(row)
-            writer.writerow(row)
-            count += 1
-    return count
+    with open(input_file) as csv_in,\
+            open(products_file, 'w') as csv_products,\
+            categories_file and open(categories_file, 'w') as csv_categories:
+        ecwid_reader = csv.DictReader(csv_in, delimiter=ECWID_CSV_DELIMITER)
+        products_writer = csv.DictWriter(
+            csv_products,
+            fieldnames=[f for f, d in WOO_IMP_EXP_PRODUCT_FIELDS])
+        products_writer.writeheader()
+        raw_category_names = []
+        products = []
+        for row in ecwid_reader:
+            product = ecwid_parser(row)
+            products.append(product)
+            raw_category_names.extend(product['Categories'])
+        raw_category_names = list(set(raw_category_names))  # Uniquify
+        categories = {}
+        slugify = UniqueSlugify()
+        for idx, name in enumerate(raw_category_names):
+            categories[name] = expand_category(name, idx, slugify,
+                                               ECWID_CATEGORY_DELIMITER,
+                                               products)
+        categories = add_missing_parents(categories)
+        for i, c in categories.items():
+            if c['Parent Raw Name']:
+                parent = categories[c['Parent Raw Name']]
+                c['Parent Id'] = parent['Id']
+                c['Parent Slug'] = parent['Slug']
+            else:
+                c['Parent Id'] = ''
+                c['Parent Slug'] = ''
+            del c['Parent Raw Name']
 
+        for product in products:
+            products_writer.writerow(product)
 
-if __name__ == '__main__':
-    try:
-        output_file = sys.argv[2]
-    except IndexError:
-        output_file = 'woocommerce_products.csv'
+        if csv_gategories:
+            categories_writer = csv.DictWriter(
+                csv_categories,
+                fieldnames=[f for f, d in WOO_IMP_EXP_CATEGORY_FIELDS])
+            categories_writer.writeheader()
+            for category in list(categories.values()):
+                categories_writer.writerow(category)
 
+if __name__ == '__main__':
     try:
         input_file = sys.argv[1]
     except IndexError:
-        print("\n  Usage: {0} input_file [output_file]\n".format(sys.argv[0]))
+        print("\n  Usage: {0} input_file\n".format(sys.argv[0]))
         sys.exit()
 
-    make_woocommerce_csv(input_file, output_file)
+    make_woocommerce_csv(input_file,
+                         'woocommerce_products.csv',
+                         'woocommerce_categories.csv')
-- 
GitLab