From ed4197e09245b5d25c0c3b7c790f40ccc353e270 Mon Sep 17 00:00:00 2001
From: Anton Sarukhanov <code@ant.sr>
Date: Tue, 3 Dec 2019 09:49:32 -0500
Subject: [PATCH] Cleanup & refactoring.

---
 .flake8              |   2 +
 .flaskenv            |   1 +
 .gitignore           |   1 +
 .gitlab-ci.yml       |  12 +++++
 README.md            |   3 +-
 app.py               |  40 ++++++++++-----
 models.py            | 113 +++++++++++++++++++++++--------------------
 mypy.ini             |   7 +++
 requirements-dev.txt |   7 +++
 requirements.txt     |   8 +--
 scraper.py           |  94 ++++++++++++++++++++---------------
 static/css/style.css |  20 +++++---
 templates/live.html  |  29 ++++++-----
 13 files changed, 207 insertions(+), 130 deletions(-)
 create mode 100644 .flake8
 create mode 100644 .flaskenv
 create mode 100644 .gitlab-ci.yml
 create mode 100644 mypy.ini
 create mode 100644 requirements-dev.txt

diff --git a/.flake8 b/.flake8
new file mode 100644
index 0000000..2bcd70e
--- /dev/null
+++ b/.flake8
@@ -0,0 +1,2 @@
+[flake8]
+max-line-length = 88
diff --git a/.flaskenv b/.flaskenv
new file mode 100644
index 0000000..95aa9fa
--- /dev/null
+++ b/.flaskenv
@@ -0,0 +1 @@
+FLASK_APP=app.py
diff --git a/.gitignore b/.gitignore
index 7f93ebf..57e5c62 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
 venv
 __pycache__
+.mypy_cache
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
new file mode 100644
index 0000000..01f77c0
--- /dev/null
+++ b/.gitlab-ci.yml
@@ -0,0 +1,12 @@
+image: python:3-buster
+
+before_script:
+  - pip install -r requirements-dev.txt
+
+test:
+    script:
+    - bandit *.py
+    - flake8 *.py
+    - mypy *.py
+    - pydocstyle *.py
+    - pylint *.py
diff --git a/README.md b/README.md
index c8fb612..1385f3c 100644
--- a/README.md
+++ b/README.md
@@ -10,10 +10,11 @@ This is a quick weekend hack, very much not "production-ready". Use at your own
 
 ## Setup
 
+* Install the `libxml2-dev` and `libxslt1-dev` libraries.
 * Create a Python 3 [virtualenv](https://virtualenv.pypa.io/en/stable/)
 * `pip install -r requirements.txt` to install Python dependencies
 
 ## Run
 
-* `FLASK_APP=app.py flask run -h 0.0.0.0`
+* `flask run`
 * Visit [localhost:5000](http://127.0.0.1:5000) in a browser.
diff --git a/app.py b/app.py
index 8abfdc8..caa2155 100644
--- a/app.py
+++ b/app.py
@@ -1,40 +1,58 @@
+"""Main entrypoint for the Flask application."""
+
 from flask import Flask, render_template, redirect, request, url_for
 from flask_caching import Cache
-from scraper import Scraper
+from scraper import FTPScraper
+import models
 
+# pylint: disable=invalid-name
 app = Flask(__name__)
 cache = Cache(app, config={'CACHE_TYPE': 'simple'})
 
-
-def make_cache_key(*args, **kwargs):
-    path = request.path
-    args = str(hash(frozenset(request.args.items())))
-    return (path + args).encode('utf-8')
+DISPLAY_DATETIME_FORMAT = '%A, %B %d, %Y at %-I:%M %p'
+DISPLAY_DATE_FORMAT = '%A, %B %-d, %Y'
+DISPLAY_TIME_FORMAT = '%-I:%M %p on %A'
 
 
 @app.after_request
 def add_header(response):
+    """Add an HTTP response header for cache invalidation."""
     response.cache_control.max_age = 300
     return response
 
 
+@app.template_filter('strftime')
+def _jinja2_filter_datetime(datetime, date=True, time=True):
+    return datetime.strftime(DISPLAY_DATETIME_FORMAT if date and time
+                             else DISPLAY_DATE_FORMAT if date
+                             else DISPLAY_TIME_FORMAT if time else '')
+
+
 @app.route("/")
 def index():
+    """Render the app landing page."""
     return render_template('index.html')
 
 
+def _make_cache_key():
+    """Create a cache key for Flask-Caching."""
+    path = request.path
+    args = str(hash(frozenset(request.args.items())))
+    return (path + args).encode('utf-8')
+
+
 @app.route("/live")
-@cache.cached(timeout=300, key_prefix=make_cache_key)
+@cache.cached(timeout=300, key_prefix=_make_cache_key)
 def live(results_url=None):
+    """Render the primary view of live tournament stats."""
     results_url = request.args.get('results_url')
     if not results_url:
         return redirect(url_for('index'))
 
-    scraper = Scraper(results_url)
+    scraper = FTPScraper(results_url)
     tournament = scraper.scrape()
-    return render_template('live.html',
-                           tournament=tournament,
-                           events=tournament.events)
+    return render_template('live.html', tournament=tournament, events=tournament.events,
+                           phases=models.EventPhase)
 
 
 if __name__ == "__main__":
diff --git a/models.py b/models.py
index 9cd181c..cb7a4a2 100644
--- a/models.py
+++ b/models.py
@@ -1,85 +1,92 @@
 """Domain-specific class definitions."""
 
+from collections import namedtuple
+from dataclasses import dataclass, field
+from datetime import datetime
+from typing import List
+from enum import Enum
 
+Fencer = namedtuple('Fencer', 'name is_checked_in')
+setattr(Fencer, '__eq__', lambda f1, f2: f1.name == f2.name)
+
+
+@dataclass
 class Tournament:
-    def __init__(self, name, url, updated='', events=None):
-        self.name = name
-        self.url = url
-        self.updated = updated
-        self.events = events or []
+    """A fencing competition with live results powered by Fencing Time."""
 
-    def add_event(self, event):
-        self.events.append(event)
-        event.tournament = self
+    name: str
+    url: str
+    updated: datetime
+    events: List['Event'] = field(default_factory=list)
 
-    def count_all_fencers(self):
+    def count_fencers(self):
+        """Count the fencers in all events."""
         for event in self.events:
             event.count_fencers()
 
 
-class EventStatus:
-    def __init__(self, name):
-        self.name = name
+class EventPhase(Enum):
+    """The current state of an Event."""
 
-    def __repr__(self):
-        return self.name
+    def __str__(self):
+        """Provide the readable name when used as a string."""
+        return str(self.name)
+    REGISTRATION = "Registration"
+    STARTED = "Started"
+    FINISHED = "Finished"
+
+
+@dataclass
+class EventStatistics:
+    """Fencer counts for an event."""
+
+    fencers_checked_in: List[Fencer] = field(default_factory=list)
+    new_fencers_not_checked_in: List[Fencer] = field(default_factory=list)
+    previously_fenced: dict = field(default_factory=dict)
+    previous_total: int = 0
 
 
+@dataclass
 class Event:
-    STATUS_REGISTRATION = EventStatus("Registration")
-    STATUS_STARTED = EventStatus("Started")
-    STATUS_FINISHED = EventStatus("Finished")
-
-    def __init__(self, name, time, status, url, fencers, tournament=None):
-        self.name = name
-        self.time = time
-        self.status = status
-        self.url = url
-        self.fencers = fencers
-        self.tournament = tournament
+    """A single event in a tournament (e.g. Y12 Mens Foil)."""
+
+    name: str
+    url: str
+    time: datetime
+    tournament: Tournament
+    phase: EventPhase
+    stats: EventStatistics = field(default_factory=EventStatistics)
+    fencers: List[Fencer] = field(default_factory=list)
 
     def __repr__(self):
+        """Use event name as the canonical representation."""
         return self.name
 
     def count_fencers(self):
         """Count the fencers in an event.
-           Result is divided by status (checked in, not checked in, etc.)"""
-        self.fencers_checked_in = []
-        self.new_fencers_not_checked_in = []
-        self.previously_fenced = {}
-        self.previous_total = 0
-        if self.name == 'Y-12 Men\'s Foil':
-            print(self.fencers)
+
+        Result is divided by status (checked in, not checked in, etc.)
+        """
+        self.stats.fencers_checked_in = []
+        self.stats.new_fencers_not_checked_in = []
+        self.stats.previously_fenced = {}
+        self.stats.previous_total = 0
         for fencer in self.fencers:
             if fencer.is_checked_in:
-                self.fencers_checked_in.append(fencer)
+                self.stats.fencers_checked_in.append(fencer)
             else:
-                self.new_fencers_not_checked_in.append(fencer)
+                self.stats.new_fencers_not_checked_in.append(fencer)
             for event in self.tournament.events:
                 if event.name == self.name:
                     break
                 if fencer in event.fencers:
-                    if event.name in self.previously_fenced:
-                        self.previously_fenced[event.name] += 1
+                    if event.name in self.stats.previously_fenced:
+                        self.stats.previously_fenced[event.name] += 1
                     else:
-                        self.previously_fenced[event.name] = 1
-                    self.previous_total += 1
+                        self.stats.previously_fenced[event.name] = 1
+                    self.stats.previous_total += 1
                     try:
-                        self.new_fencers_not_checked_in.remove(fencer)
+                        self.stats.new_fencers_not_checked_in.remove(fencer)
                     except ValueError:
                         pass  # already removed; ignore
                     break
-
-
-class Fencer:
-
-    def __init__(self, name, is_checked_in):
-        self.name = name.strip()
-        self.is_checked_in = is_checked_in
-
-    def __repr__(self):
-        return '<Fencer name="{}" is_checked_in="{}">'.format(
-            self.name, self.is_checked_in)
-
-    def __eq__(self, other):
-        return self.name == other.name
diff --git a/mypy.ini b/mypy.ini
new file mode 100644
index 0000000..18dc9e9
--- /dev/null
+++ b/mypy.ini
@@ -0,0 +1,7 @@
+[mypy]
+
+# There's no mypy stub for these libs. I want to use them, but don't feel like creating the stubs.
+[mypy-flask_caching.*]
+ignore_missing_imports = True
+[mypy-lxml.*]
+ignore_missing_imports = True
diff --git a/requirements-dev.txt b/requirements-dev.txt
new file mode 100644
index 0000000..d411dbc
--- /dev/null
+++ b/requirements-dev.txt
@@ -0,0 +1,7 @@
+-r requirements.txt
+bandit==1.6.2
+flake8==3.7.9
+mypy==0.750
+pydocstyle==4.0.1
+pylint==2.4.4
+python-dotenv==0.10.3
diff --git a/requirements.txt b/requirements.txt
index 66f8706..54b901f 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-lxml==3.6.4
-requests==2.12.1
-Flask==0.11.1
-Flask-Caching==1.3.3
+lxml==4.4.2
+requests==2.22.0
+Flask==1.1.1
+Flask-Caching==1.8.0
diff --git a/scraper.py b/scraper.py
index 653d982..05a4475 100644
--- a/scraper.py
+++ b/scraper.py
@@ -1,34 +1,46 @@
-from lxml import html
-import requests
+"""Extract fencing event registration stats from Fencing Time webpages."""
+
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
+from datetime import datetime
 from urllib.parse import urlparse, urljoin
+from lxml import html  # nosec Bandit suggests defusedxml but defusedxml.lxml is dead
+import requests
+from models import Event, EventPhase, Fencer, Tournament
 
-from models import Event, Fencer, Tournament
 
+class FTPScraper:
+    """Scraper for tournaments hosted on an FTP server.
+
+    This reads the original Fencing Time results pages, hosted by individual
+    tournament operators.
+    """
+
+    UPDATED_DATETIME_FORMAT = '%m/%d/%Y - %I:%M %p'
+    EVENT_DATETIME_FORMAT = '%A, %B %d, %Y - %I:%M %p'
 
-class Scraper:
     def __init__(self, tournament_url):
+        """Set up the scraper instance."""
         self.tournament_url = tournament_url
+        self.tournament = None
 
     def scrape(self):
-
-        # Get tournament info
+        """Get all tournament information."""
         try:
             results = requests.get(self.tournament_url)
         except requests.exceptions.MissingSchema:
             results = requests.get("http://{}".format(self.tournament_url))
         results_tree = html.fromstring(results.content)
         try:
-            tournament_name = results_tree.xpath(
-                '//span[@class="tournName"]/text()')[0]
-            updated = (results_tree.xpath(
-                '//span[@class="lastUpdate"]/text()')[0]
-                .replace('Last Updated:', '').strip())
+            tournament_name = results_tree.xpath('//span[@class="tournName"]/text()')[0]
+            updated_str = (results_tree.xpath('//span[@class="lastUpdate"]/text()')[0]
+                           .replace('Last Updated:', '').strip())
+            updated = datetime.strptime(updated_str, self.UPDATED_DATETIME_FORMAT)
         except IndexError:
             raise ScrapeError("Tournament info not found.")
 
-        self.tournament = Tournament(tournament_name, results.url, updated)
+        self.tournament = Tournament(name=tournament_name, url=results.url,
+                                     updated=updated)
 
         # Get tournament events
         try:
@@ -38,12 +50,12 @@ class Scraper:
             raise ScrapeError("No event schedule found.")
 
         loop = asyncio.new_event_loop()
-        loop.run_until_complete(self.scrape_events(event_urls))
+        loop.run_until_complete(self._scrape_events(event_urls))
 
         return self.tournament
 
-    async def scrape_events(self, event_urls):
-
+    async def _scrape_events(self, event_urls):
+        """Get event information asynchronously."""
         with ThreadPoolExecutor(max_workers=20) as executor:
             loop = asyncio.get_event_loop()
             futures = []
@@ -51,54 +63,56 @@ class Scraper:
             for event_url in event_urls:
                 if not urlparse(event_url).netloc:
                     event_url = urljoin(self.tournament.url, event_url)
-                futures.append(loop.run_in_executor(
-                    executor,
-                    requests.get,
-                    event_url))
+                futures.append(loop.run_in_executor(executor, requests.get, event_url))
 
             for response in await asyncio.gather(*futures):
                 event = self.parse_event(response)
-                self.tournament.add_event(event)
+                self.tournament.events.append(event)
 
-            self.tournament.count_all_fencers()
+            self.tournament.count_fencers()
 
     def parse_event(self, event):
-        # Get the event details (name, time) as text
+        """Extract useful strings from the event info."""
         event_tree = html.fromstring(event.content)
-        event_details = event_tree.xpath(
-            '//span[@class="tournDetails"]/text()')
+        event_details = event_tree.xpath('//span[@class="tournDetails"]/text()')
         try:
             event_name = event_details[0]
-            event_time = event_details[1]
+            event_time_str = event_details[1]
+            event_time = datetime.strptime(event_time_str, self.EVENT_DATETIME_FORMAT)
         except IndexError:
-            raise ScrapeError(
-                "Failed to interpret live results for event \"{}\"."
-                .format(event_details))
+            raise ScrapeError("Failed to interpret live results for event \"{}\"."
+                              .format(event_details))
 
         # Get the event status
         if event_tree.xpath('//a[text()="Final Results"]'):
             fencers = event_tree.xpath(
                 '//div[@id="finalResults"]/table/tr/td[2]/text()')
             fencers = [Fencer(f, True) for f in fencers]
-            event_status = Event.STATUS_FINISHED
+            event_phase = EventPhase.FINISHED
         elif event_tree.xpath('//a[text()="Seeding"]'):
             fencers = event_tree.xpath(
                 '//div[@id="Round1Seeding"]/table/tr/td[2]/text()')
             fencers = [Fencer(f, True) for f in fencers]
-            event_status = Event.STATUS_STARTED
+            event_phase = EventPhase.STARTED
         elif event_tree.xpath('//a[text()="Check-In Status"]'):
-            event_status = Event.STATUS_REGISTRATION
+            event_phase = EventPhase.REGISTRATION
             fencers_checked_in = [
-                True if len(list(f)) else False
-                for f in event_tree.xpath(
-                    '//div[@id="checkIn"]/table/tr/td[1]')]
-            fencers = event_tree.xpath(
-                '//div[@id="checkIn"]/table/tr/td[2]/text()')
-            fencers = [Fencer(f, ci)
-                       for (f, ci) in zip(fencers, fencers_checked_in)]
+                bool(list(f))
+                for f in event_tree.xpath('//div[@id="checkIn"]/table/tr/td[1]')]
+            fencers = event_tree.xpath('//div[@id="checkIn"]/table/tr/td[2]/text()')
+            fencers = [Fencer(f, ci) for (f, ci) in zip(fencers, fencers_checked_in)]
+
+        return Event(name=event_name, time=event_time, phase=event_phase,
+                     url=event.url, fencers=fencers, tournament=self.tournament)
+
+
+class FTLiveScraper(FTPScraper):
+    """Scraper for tournaments hosted on fencingtimelive.com.
 
-        return Event(event_name, event_time, event_status, event.url, fencers)
+    This reads the newer-style pages, centrally hosted by Fencing Time.
+    """
+    # to do...
 
 
 class ScrapeError(Exception):
-    pass
+    """A problem with scraping."""
diff --git a/static/css/style.css b/static/css/style.css
index 06e52db..5c55384 100644
--- a/static/css/style.css
+++ b/static/css/style.css
@@ -14,9 +14,6 @@ div.time {
 div.name {
     font-size: 1.1em;
 }
-div.time {
-    font-size: .80em;
-}
 section {
     border-radius: 1em;
     border: 2px solid #555;
@@ -50,8 +47,8 @@ a:hover {
     font-size: 1.15em;
 }
 
-/* Event statuses */
-.status {
+/* Event phases */
+.phase {
     float: right;
     color: #000;
     border-radius: .25em;
@@ -63,13 +60,13 @@ a:hover {
     vertical-align: top;
     text-transform: uppercase;
 }
-.status-registration .status {
+.phase-registration .phase {
     background-color: #2f2;
 }
-.status-started .status {
+.phase-started .phase {
     background-color: #fc0;
 }
-.status-finished .status {
+.phase-finished .phase {
     background-color: #f22;
 }
 
@@ -107,6 +104,13 @@ body.page-live main {
     max-width: 100%;
     box-sizing: border-box;
 }
+body.page-live main > header {
+    padding: 0 .5em;
+    flex-basis: 100%;
+}
+body.page-live main > section {
+    margin: .5em;
+}
 body.page-live p, ul {
     margin: .25em 0;
 }
diff --git a/templates/live.html b/templates/live.html
index dab56a5..ae8a4cf 100644
--- a/templates/live.html
+++ b/templates/live.html
@@ -10,37 +10,40 @@
                 <img class="ext-link" src="{{ url_for('static', filename='images/font-awesome/external-link-alt.svg') }}?t=20180415"></a></h1>
     </header>
     <main>
-    {% for e in events %}
-    <section class="status-{{ e['status']|lower }}">
+    {% for e in events | sort(attribute='time') %}
+    {% if loop.changed(e.time.date()) %}
+    <header><h2>{{ e.time | strftime(time=False) }}</h2></header>
+    {% endif %}
+    <section class="phase-{{ e.phase|lower }}">
     <header>
-        <div class="status">{{ e['status']|lower }}</div>
+        <div class="phase">{{ e.phase|lower }}</div>
         <div class="event-actions">
             <a href="{{ e.url }}" target="_blank">
                 <img class="ext-link" src="{{ url_for('static', filename='images/font-awesome/external-link-alt.svg') }}?t=20180415"></a>
         </div>
         <div class="name">
-            <a name="{{ e['name'] }}" href="#{{ e['name'] }}">{{ e['name'] }}</a>
+            <a name="{{ e.name }}" href="#{{ e.name }}">{{ e.name }}</a>
         </div>
-        <div class="time">{{ e['time'] }}</div>
+        <div class="time">{{ e.time | strftime(date=False) }}</div>
     </header>
-    <p><span class="number">{{ e['fencers_checked_in'] | length }}</span> of <span class="number">{{ e['fencers'] | length }}</span> fencer(s) checked in.</p>
-    {% if e['previously_fenced'] %}
-    <p><span class="number">{{ e['previous_total'] }}</span> fenced in prior events:</p>
+    <p><span class="number">{{ e.stats.fencers_checked_in | length }}</span> of <span class="number">{{ e.fencers | length }}</span> fencer(s) checked in.</p>
+    {% if e.stats.previously_fenced %}
+    <p><span class="number">{{ e.stats.previous_total }}</span> fenced in prior events:</p>
     <ul>
-    {% for pe in e['previously_fenced'] %}
-        <li>{{ pe }} - {{ e['previously_fenced'][pe] }}</li>
+    {% for pe in e.stats.previously_fenced %}
+        <li>{{ pe }} - {{ e.stats.previously_fenced[pe] }}</li>
     {% endfor %}
     </ul>
     {% endif %}
-    {% if e['new_fencers_not_checked_in'] %}
-    <p class="hl">{{ e['new_fencers_not_checked_in'] | length }} new and not checked in</p>
+    {% if e.stats.new_fencers_not_checked_in %}
+    <p class="hl">{{ e.stats.new_fencers_not_checked_in | length }} new and not checked in</p>
     {% endif %}
     </section>
     {% endfor %}
     </main>
     <footer>
     {% if tournament.updated %}
-        <div class="updated">Last updated: <span class="date">{{ tournament.updated }}</span></div>
+        <div class="updated">Last updated: <span class="date">{{ tournament.updated | strftime }}</span></div>
     {% endif %}
     </footer>
 {% endblock content %}
-- 
GitLab