From c1541bda6ad266e44a3e8d9891f023bb0aaeeab0 Mon Sep 17 00:00:00 2001
From: Anton Sarukhanov <code@ant.sr>
Date: Tue, 3 Dec 2019 22:20:09 -0500
Subject: [PATCH] Add support for Fencing Time Live.

---
 app.py               |  39 +++++++-----
 models.py            |   9 ++-
 scraper.py           | 141 ++++++++++++++++++++++++++++++++++++++-----
 templates/index.html |  64 +++++++++-----------
 templates/live.html  |   4 +-
 5 files changed, 187 insertions(+), 70 deletions(-)

diff --git a/app.py b/app.py
index caa2155..bbea236 100644
--- a/app.py
+++ b/app.py
@@ -2,10 +2,10 @@
 
 from flask import Flask, render_template, redirect, request, url_for
 from flask_caching import Cache
-from scraper import FTPScraper
+from scraper import FTPScraper, FTLiveScraper
 import models
 
-# pylint: disable=invalid-name
+# pylint: disable=invalid-name ; These module-level variables are standard for Flask.
 app = Flask(__name__)
 cache = Cache(app, config={'CACHE_TYPE': 'simple'})
 
@@ -14,6 +14,13 @@ DISPLAY_DATE_FORMAT = '%A, %B %-d, %Y'
 DISPLAY_TIME_FORMAT = '%-I:%M %p on %A'
 
 
+def _make_cache_key():
+    """Create a cache key for Flask-Caching."""
+    path = request.path
+    args = str(hash(frozenset(request.args.items())))
+    return (path + args).encode('utf-8')
+
+
 @app.after_request
 def add_header(response):
     """Add an HTTP response header for cache invalidation."""
@@ -23,36 +30,36 @@ def add_header(response):
 
 @app.template_filter('strftime')
 def _jinja2_filter_datetime(datetime, date=True, time=True):
+    """Format a DateTime for display to a user."""
     return datetime.strftime(DISPLAY_DATETIME_FORMAT if date and time
                              else DISPLAY_DATE_FORMAT if date
                              else DISPLAY_TIME_FORMAT if time else '')
 
 
 @app.route("/")
+@cache.cached(timeout=300)
 def index():
     """Render the app landing page."""
-    return render_template('index.html')
-
-
-def _make_cache_key():
-    """Create a cache key for Flask-Caching."""
-    path = request.path
-    args = str(hash(frozenset(request.args.items())))
-    return (path + args).encode('utf-8')
+    ftl_scraper = FTLiveScraper()
+    return render_template('index.html',
+                           tournaments=ftl_scraper.list_tournaments())
 
 
 @app.route("/live")
 @cache.cached(timeout=300, key_prefix=_make_cache_key)
-def live(results_url=None):
+def live():
     """Render the primary view of live tournament stats."""
     results_url = request.args.get('results_url')
-    if not results_url:
+    ftl_id = request.args.get('ftl_id')
+
+    if ftl_id:
+        tournament = FTLiveScraper().scrape_tournament(tournament_id=ftl_id)
+    elif results_url:
+        tournament = FTPScraper(results_url).scrape_tournament()
+    else:
         return redirect(url_for('index'))
 
-    scraper = FTPScraper(results_url)
-    tournament = scraper.scrape()
-    return render_template('live.html', tournament=tournament, events=tournament.events,
-                           phases=models.EventPhase)
+    return render_template('live.html', tournament=tournament, phases=models.EventPhase)
 
 
 if __name__ == "__main__":
diff --git a/models.py b/models.py
index cb7a4a2..3dbf3a1 100644
--- a/models.py
+++ b/models.py
@@ -6,7 +6,7 @@ from datetime import datetime
 from typing import List
 from enum import Enum
 
-Fencer = namedtuple('Fencer', 'name is_checked_in')
+Fencer = namedtuple('Fencer', 'name is_checked_in ftl_id', defaults=[None])
 setattr(Fencer, '__eq__', lambda f1, f2: f1.name == f2.name)
 
 
@@ -16,7 +16,9 @@ class Tournament:
 
     name: str
     url: str
-    updated: datetime
+    ftl_id: str = None  # type: ignore
+    updated: datetime = None  # type: ignore
+    location: str = None  # type: ignore
     events: List['Event'] = field(default_factory=list)
 
     def count_fencers(self):
@@ -47,7 +49,7 @@ class EventStatistics:
 
 
 @dataclass
-class Event:
+class Event:  # pylint: disable=too-many-instance-attributes
     """A single event in a tournament (e.g. Y12 Mens Foil)."""
 
     name: str
@@ -55,6 +57,7 @@ class Event:
     time: datetime
     tournament: Tournament
     phase: EventPhase
+    ftl_id: str = None  # type: ignore
     stats: EventStatistics = field(default_factory=EventStatistics)
     fencers: List[Fencer] = field(default_factory=list)
 
diff --git a/scraper.py b/scraper.py
index 05a4475..37f9e59 100644
--- a/scraper.py
+++ b/scraper.py
@@ -2,14 +2,24 @@
 
 import asyncio
 from concurrent.futures import ThreadPoolExecutor
-from datetime import datetime
-from urllib.parse import urlparse, urljoin
-from lxml import html  # nosec Bandit suggests defusedxml but defusedxml.lxml is dead
+from datetime import date, datetime, timedelta
+from urllib.parse import urlparse, urljoin, urlencode
+from lxml import html  # nosec ; Bandit suggests defusedxml but defusedxml.lxml is dead
 import requests
 from models import Event, EventPhase, Fencer, Tournament
 
+# pylint: disable=too-few-public-methods ; I'm ok with that.
 
-class FTPScraper:
+
+class Scraper:
+    """Base class."""
+
+    def __init__(self):
+        """Initialize common args for scrapers."""
+        self.tournament = None
+
+
+class FTPScraper(Scraper):
     """Scraper for tournaments hosted on an FTP server.
 
     This reads the original Fencing Time results pages, hosted by individual
@@ -22,18 +32,20 @@ class FTPScraper:
     def __init__(self, tournament_url):
         """Set up the scraper instance."""
         self.tournament_url = tournament_url
-        self.tournament = None
+        super(FTPScraper, self).__init__()
 
-    def scrape(self):
+    def scrape_tournament(self):
         """Get all tournament information."""
         try:
             results = requests.get(self.tournament_url)
         except requests.exceptions.MissingSchema:
             results = requests.get("http://{}".format(self.tournament_url))
-        results_tree = html.fromstring(results.content)
+        tournament_etree = html.fromstring(results.content)
         try:
-            tournament_name = results_tree.xpath('//span[@class="tournName"]/text()')[0]
-            updated_str = (results_tree.xpath('//span[@class="lastUpdate"]/text()')[0]
+            tournament_name = tournament_etree.xpath(
+                '//span[@class="tournName"]/text()')[0]
+            updated_str = (tournament_etree.xpath(
+                '//span[@class="lastUpdate"]/text()')[0]
                            .replace('Last Updated:', '').strip())
             updated = datetime.strptime(updated_str, self.UPDATED_DATETIME_FORMAT)
         except IndexError:
@@ -44,7 +56,7 @@ class FTPScraper:
 
         # Get tournament events
         try:
-            event_urls = results_tree.xpath(
+            event_urls = tournament_etree.xpath(
                 '//div[@id="schedule"]/table/tr/td/a[text()="View"]/@href')
         except IndexError:
             raise ScrapeError("No event schedule found.")
@@ -66,12 +78,12 @@ class FTPScraper:
                 futures.append(loop.run_in_executor(executor, requests.get, event_url))
 
             for response in await asyncio.gather(*futures):
-                event = self.parse_event(response)
+                event = self._parse_event(response)
                 self.tournament.events.append(event)
 
             self.tournament.count_fencers()
 
-    def parse_event(self, event):
+    def _parse_event(self, event):
         """Extract useful strings from the event info."""
         event_tree = html.fromstring(event.content)
         event_details = event_tree.xpath('//span[@class="tournDetails"]/text()')
@@ -106,12 +118,113 @@ class FTPScraper:
                      url=event.url, fencers=fencers, tournament=self.tournament)
 
 
-class FTLiveScraper(FTPScraper):
+class FTLiveScraper(Scraper):
     """Scraper for tournaments hosted on fencingtimelive.com.
 
     This reads the newer-style pages, centrally hosted by Fencing Time.
     """
-    # to do...
+
+    BASE_URL = 'https://fencingtimelive.com'
+    TOURNAMENTS_URL = urljoin(BASE_URL, 'tournaments/list/data?{query}')
+    TOURNAMENT_URL = urljoin(BASE_URL, 'tournaments/eventSchedule/{tournament_id}')
+    FENCERS_URL = urljoin(BASE_URL, 'events/competitors/data/{event_id}')
+    EVENT_URL = urljoin(BASE_URL, 'events/view/{event_id}')
+    START_FORMAT = '%Y-%m-%dT%H:%M:%S.000Z'
+    EVENT_DATETIME_FORMAT = '%A %B %d, %Y %I:%M %p'
+    MAX_AGO = timedelta(days=7)
+    MAX_AHEAD = timedelta(days=7)
+
+    def list_tournaments(self, search=None, from_date=None, to_date=None):
+        """Get a list of tournaments in FTLive."""
+        if not search and not from_date and not to_date:
+            from_date = date.today() - self.MAX_AGO
+            to_date = date.today() + self.MAX_AHEAD
+        args = {
+            'tname': search or '',
+            'from': from_date or '',
+            'to': to_date or ''
+        }
+        url = self.TOURNAMENTS_URL.format(query=urlencode(args))
+        tournaments = requests.get(url).json()
+        return [{'start': datetime.strptime(t['start'], self.START_FORMAT),
+                 'id': t['id'],
+                 'name': t['name'],
+                 'location': t['location']}
+                for t in tournaments]
+
+    def scrape_tournament(self, tournament_id):
+        """Get all tournament information."""
+        tournament_url = self.TOURNAMENT_URL.format(tournament_id=tournament_id)
+        tournament_html = requests.get(tournament_url).content
+        tournament_etree = html.fromstring(tournament_html)
+        try:
+            tournament_name = tournament_etree.xpath(
+                '//div[@class="desktop tournName"]/text()')[0]
+        except IndexError:
+            raise ScrapeError("Tournament info not found.")
+        self.tournament = Tournament(name=tournament_name, url=tournament_url,
+                                     ftl_id=tournament_id)
+
+        event_data = tournament_etree.xpath(
+            "//tr[re:test(@id, 'ev_.*')]",
+            namespaces={"re": "http://exslt.org/regular-expressions"})
+
+        for event in event_data:
+            self.tournament.events.append(self._parse_event(event))
+
+        loop = asyncio.new_event_loop()
+        loop.run_until_complete(self._get_fencers())
+
+        return self.tournament
+
+    async def _get_fencers(self):
+        """Get event information asynchronously."""
+        def get_fencers(event):
+            fencers_url = self.FENCERS_URL.format(event_id=event.ftl_id)
+            response = requests.get(fencers_url).json()
+            event.fencers = [
+                Fencer(name=f['name'], ftl_id=f['id'],
+                       is_checked_in=(f['status'] == 'CheckedIn'))
+                for f in response if f['status'] != 'Scratched']
+            return event
+
+        with ThreadPoolExecutor(max_workers=20) as executor:
+            loop = asyncio.get_event_loop()
+            futures = []
+
+            for event in self.tournament.events:
+                futures.append(loop.run_in_executor(executor, get_fencers, event))
+
+            await asyncio.gather(*futures)
+
+            self.tournament.count_fencers()
+
+    def _parse_event(self, event_etree):
+        """Extract useful strings from the event info."""
+        try:
+            name = event_etree.getchildren()[1].text_content().strip()
+            ftl_id = event_etree.attrib['data-href'].split('/')[-1]
+            url = self.EVENT_URL.format(event_id=ftl_id)
+            time_str = event_etree.getchildren()[0].text_content().strip()
+            table = next(event_etree.iterancestors('table'))
+            date_str = next(table.itersiblings('h5', preceding=True)).text_content()
+            dt_str = '{} {}'.format(date_str, time_str)
+            time = datetime.strptime(dt_str, self.EVENT_DATETIME_FORMAT)
+            return Event(name=name, url=url, time=time, tournament=self.tournament,
+                         phase=self._get_event_phase(event_etree), ftl_id=ftl_id)
+        except (IndexError, ScrapeError) as exc:
+            raise ScrapeError("Failed to interpret live results for event \"{}\". {}"
+                              .format(name, exc))
+
+    def _get_event_phase(self, event_etree):  # pylint: disable=no-self-use
+        """Determine the state of the event."""
+        if event_etree.xpath('td[3]/text()[contains(., "Finished")]'):
+            event_phase = EventPhase.FINISHED
+        elif event_etree.xpath('td[3]/text()[contains(., "Fencing")]'):
+            event_phase = EventPhase.STARTED
+        else:
+            event_phase = EventPhase.REGISTRATION
+        return event_phase
 
 
 class ScrapeError(Exception):
diff --git a/templates/index.html b/templates/index.html
index 4853d48..8561459 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -7,40 +7,34 @@
         <p>
             Welcome! Please select a Live Results URL, or enter your own.
         </p>
-        <p>
-            <form action="{{ url_for('live') }}" method="get">
-                <label>Live Results URL: &nbsp;
-                    <select name="results_url" onchange="this.form.submit()">
-                        <option value="" selected disabled>-- Select One --</option>
-                        <optgroup label="Escrime Management">
-                            <option value="http://escrimeresults.com/tournaments/NCAA.html">NCAA</option>
-                            <option value="http://escrimeresults.com/tournaments/Atlantic-Coast-Conference.html">Atlantic Coast Conference</option>
-                            <option value="http://escrimeresults.com/tournaments/Ivy-League.html">Ivy League</option>
-                            <option value="http://escrimeresults.com/cobra/">Cobra</option>
-                            <option value="http://escrimeresults.com/thrust/">Thrust</option>
-                            <option value="http://escrimeresults.com/tournaments/NJSIAA.html">NJSIAA</option>
-                            <option value="http://escrimeresults.com/Candlewood/">Candlewood</option>
-                            <option value="http://escrimeresults.com/tournaments/U-Penn.html">U Penn</option>
-                            <option value="http://escrimeresults.com/tournaments/Temple-University.html">Temple U</option>
-                            <option value="http://escrimeresults.com/Big1/">Big1</option>
-                            <option value="http://escrimeresults.com/tournaments/US-Collegiate-Squad-Championships.html">US Collegiate Squad</option>
-                            <option value="http://escrimeresults.com/MKHS/">MKHS</option>
-                        </optgroup>
-                        <optgroup label="NJ Division">
-                            <option value="http://njfencingresults.org/liveresults/">NJFencingResults.org/liveresults</option>
-                        </optgroup>
-                    </select>
-                </label>
-                <input type="submit" value="Go!">
-            </form>
-        </p>
-        <p>
-            <form action="{{ url_for('live') }}" method="get">
-                <label>Other URL: &nbsp;
-                    <input name="results_url" placeholder="example.com/liveresults">
-                </label>
-                <input type="submit" value="Go!">
-            </form>
-        </p>
+        <section>
+            <h2>Fencing Time Live</h2>
+            <p>
+                <form action="{{ url_for('live') }}" method="get">
+                    <label>Choose a Tournament: &nbsp;
+                        <select name="ftl_id" onchange="this.form.submit()">
+                            <option value="" selected disabled>-- Select One --</option>
+                            {% for tournament in tournaments | sort(attribute='start') %}
+                                {% if loop.changed(tournament.start) %}</optgroup><optgroup label="{{ tournament.start | strftime(time=False) }}">{% endif %}
+                                <option value="{{ tournament['id'] }}">{{ tournament['name'] }} ({{ tournament['location'] }})</option>
+                                {% if loop.last %}</optgroup>{% endif %}
+                            {% endfor %}
+                        </select>
+                    </label>
+                    <input type="submit" value="Go!">
+                </form>
+            </p>
+        </section>
+        <section>
+            <h2>FTP Live Results</h2>
+            <p>
+                <form action="{{ url_for('live') }}" method="get">
+                    <label>Enter a custom Live Results link: &nbsp;
+                        <input name="results_url" placeholder="example.com/liveresults">
+                    </label>
+                    <input type="submit" value="Go!">
+                </form>
+            </p>
+        </section>
     </main>
 {% endblock content %}
diff --git a/templates/live.html b/templates/live.html
index ae8a4cf..b55e483 100644
--- a/templates/live.html
+++ b/templates/live.html
@@ -5,12 +5,12 @@
 {% block content %}
     <header>
         <span class="back-to-home"><a href="{{ url_for('index') }}">Back to Home</a></span>
-        <h1>{{ tournament.name }} - {{ events | length }} events
+        <h1>{{ tournament.name }} - {{ tournament.events | length }} events
             <a href="{{ tournament.url }}" target="_blank">
                 <img class="ext-link" src="{{ url_for('static', filename='images/font-awesome/external-link-alt.svg') }}?t=20180415"></a></h1>
     </header>
     <main>
-    {% for e in events | sort(attribute='time') %}
+    {% for e in tournament.events | sort(attribute='time') %}
     {% if loop.changed(e.time.date()) %}
     <header><h2>{{ e.time | strftime(time=False) }}</h2></header>
     {% endif %}
-- 
GitLab