Skip to content
Snippets Groups Projects
Commit c1541bda authored by Anton Sarukhanov's avatar Anton Sarukhanov
Browse files

Add support for Fencing Time Live.

parent ed4197e0
No related branches found
No related tags found
No related merge requests found
Pipeline #105 passed with stage
in 43 seconds
......@@ -2,10 +2,10 @@
from flask import Flask, render_template, redirect, request, url_for
from flask_caching import Cache
from scraper import FTPScraper
from scraper import FTPScraper, FTLiveScraper
import models
# pylint: disable=invalid-name
# pylint: disable=invalid-name ; These module-level variables are standard for Flask.
app = Flask(__name__)
cache = Cache(app, config={'CACHE_TYPE': 'simple'})
......@@ -14,6 +14,13 @@ DISPLAY_DATE_FORMAT = '%A, %B %-d, %Y'
DISPLAY_TIME_FORMAT = '%-I:%M %p on %A'
def _make_cache_key():
"""Create a cache key for Flask-Caching."""
path = request.path
args = str(hash(frozenset(request.args.items())))
return (path + args).encode('utf-8')
@app.after_request
def add_header(response):
"""Add an HTTP response header for cache invalidation."""
......@@ -23,36 +30,36 @@ def add_header(response):
@app.template_filter('strftime')
def _jinja2_filter_datetime(datetime, date=True, time=True):
"""Format a DateTime for display to a user."""
return datetime.strftime(DISPLAY_DATETIME_FORMAT if date and time
else DISPLAY_DATE_FORMAT if date
else DISPLAY_TIME_FORMAT if time else '')
@app.route("/")
@cache.cached(timeout=300)
def index():
"""Render the app landing page."""
return render_template('index.html')
def _make_cache_key():
"""Create a cache key for Flask-Caching."""
path = request.path
args = str(hash(frozenset(request.args.items())))
return (path + args).encode('utf-8')
ftl_scraper = FTLiveScraper()
return render_template('index.html',
tournaments=ftl_scraper.list_tournaments())
@app.route("/live")
@cache.cached(timeout=300, key_prefix=_make_cache_key)
def live(results_url=None):
def live():
"""Render the primary view of live tournament stats."""
results_url = request.args.get('results_url')
if not results_url:
ftl_id = request.args.get('ftl_id')
if ftl_id:
tournament = FTLiveScraper().scrape_tournament(tournament_id=ftl_id)
elif results_url:
tournament = FTPScraper(results_url).scrape_tournament()
else:
return redirect(url_for('index'))
scraper = FTPScraper(results_url)
tournament = scraper.scrape()
return render_template('live.html', tournament=tournament, events=tournament.events,
phases=models.EventPhase)
return render_template('live.html', tournament=tournament, phases=models.EventPhase)
if __name__ == "__main__":
......
......@@ -6,7 +6,7 @@ from datetime import datetime
from typing import List
from enum import Enum
Fencer = namedtuple('Fencer', 'name is_checked_in')
Fencer = namedtuple('Fencer', 'name is_checked_in ftl_id', defaults=[None])
setattr(Fencer, '__eq__', lambda f1, f2: f1.name == f2.name)
......@@ -16,7 +16,9 @@ class Tournament:
name: str
url: str
updated: datetime
ftl_id: str = None # type: ignore
updated: datetime = None # type: ignore
location: str = None # type: ignore
events: List['Event'] = field(default_factory=list)
def count_fencers(self):
......@@ -47,7 +49,7 @@ class EventStatistics:
@dataclass
class Event:
class Event: # pylint: disable=too-many-instance-attributes
"""A single event in a tournament (e.g. Y12 Mens Foil)."""
name: str
......@@ -55,6 +57,7 @@ class Event:
time: datetime
tournament: Tournament
phase: EventPhase
ftl_id: str = None # type: ignore
stats: EventStatistics = field(default_factory=EventStatistics)
fencers: List[Fencer] = field(default_factory=list)
......
......@@ -2,14 +2,24 @@
import asyncio
from concurrent.futures import ThreadPoolExecutor
from datetime import datetime
from urllib.parse import urlparse, urljoin
from lxml import html # nosec Bandit suggests defusedxml but defusedxml.lxml is dead
from datetime import date, datetime, timedelta
from urllib.parse import urlparse, urljoin, urlencode
from lxml import html # nosec ; Bandit suggests defusedxml but defusedxml.lxml is dead
import requests
from models import Event, EventPhase, Fencer, Tournament
# pylint: disable=too-few-public-methods ; I'm ok with that.
class FTPScraper:
class Scraper:
"""Base class."""
def __init__(self):
"""Initialize common args for scrapers."""
self.tournament = None
class FTPScraper(Scraper):
"""Scraper for tournaments hosted on an FTP server.
This reads the original Fencing Time results pages, hosted by individual
......@@ -22,18 +32,20 @@ class FTPScraper:
def __init__(self, tournament_url):
"""Set up the scraper instance."""
self.tournament_url = tournament_url
self.tournament = None
super(FTPScraper, self).__init__()
def scrape(self):
def scrape_tournament(self):
"""Get all tournament information."""
try:
results = requests.get(self.tournament_url)
except requests.exceptions.MissingSchema:
results = requests.get("http://{}".format(self.tournament_url))
results_tree = html.fromstring(results.content)
tournament_etree = html.fromstring(results.content)
try:
tournament_name = results_tree.xpath('//span[@class="tournName"]/text()')[0]
updated_str = (results_tree.xpath('//span[@class="lastUpdate"]/text()')[0]
tournament_name = tournament_etree.xpath(
'//span[@class="tournName"]/text()')[0]
updated_str = (tournament_etree.xpath(
'//span[@class="lastUpdate"]/text()')[0]
.replace('Last Updated:', '').strip())
updated = datetime.strptime(updated_str, self.UPDATED_DATETIME_FORMAT)
except IndexError:
......@@ -44,7 +56,7 @@ class FTPScraper:
# Get tournament events
try:
event_urls = results_tree.xpath(
event_urls = tournament_etree.xpath(
'//div[@id="schedule"]/table/tr/td/a[text()="View"]/@href')
except IndexError:
raise ScrapeError("No event schedule found.")
......@@ -66,12 +78,12 @@ class FTPScraper:
futures.append(loop.run_in_executor(executor, requests.get, event_url))
for response in await asyncio.gather(*futures):
event = self.parse_event(response)
event = self._parse_event(response)
self.tournament.events.append(event)
self.tournament.count_fencers()
def parse_event(self, event):
def _parse_event(self, event):
"""Extract useful strings from the event info."""
event_tree = html.fromstring(event.content)
event_details = event_tree.xpath('//span[@class="tournDetails"]/text()')
......@@ -106,12 +118,113 @@ class FTPScraper:
url=event.url, fencers=fencers, tournament=self.tournament)
class FTLiveScraper(FTPScraper):
class FTLiveScraper(Scraper):
"""Scraper for tournaments hosted on fencingtimelive.com.
This reads the newer-style pages, centrally hosted by Fencing Time.
"""
# to do...
BASE_URL = 'https://fencingtimelive.com'
TOURNAMENTS_URL = urljoin(BASE_URL, 'tournaments/list/data?{query}')
TOURNAMENT_URL = urljoin(BASE_URL, 'tournaments/eventSchedule/{tournament_id}')
FENCERS_URL = urljoin(BASE_URL, 'events/competitors/data/{event_id}')
EVENT_URL = urljoin(BASE_URL, 'events/view/{event_id}')
START_FORMAT = '%Y-%m-%dT%H:%M:%S.000Z'
EVENT_DATETIME_FORMAT = '%A %B %d, %Y %I:%M %p'
MAX_AGO = timedelta(days=7)
MAX_AHEAD = timedelta(days=7)
def list_tournaments(self, search=None, from_date=None, to_date=None):
"""Get a list of tournaments in FTLive."""
if not search and not from_date and not to_date:
from_date = date.today() - self.MAX_AGO
to_date = date.today() + self.MAX_AHEAD
args = {
'tname': search or '',
'from': from_date or '',
'to': to_date or ''
}
url = self.TOURNAMENTS_URL.format(query=urlencode(args))
tournaments = requests.get(url).json()
return [{'start': datetime.strptime(t['start'], self.START_FORMAT),
'id': t['id'],
'name': t['name'],
'location': t['location']}
for t in tournaments]
def scrape_tournament(self, tournament_id):
"""Get all tournament information."""
tournament_url = self.TOURNAMENT_URL.format(tournament_id=tournament_id)
tournament_html = requests.get(tournament_url).content
tournament_etree = html.fromstring(tournament_html)
try:
tournament_name = tournament_etree.xpath(
'//div[@class="desktop tournName"]/text()')[0]
except IndexError:
raise ScrapeError("Tournament info not found.")
self.tournament = Tournament(name=tournament_name, url=tournament_url,
ftl_id=tournament_id)
event_data = tournament_etree.xpath(
"//tr[re:test(@id, 'ev_.*')]",
namespaces={"re": "http://exslt.org/regular-expressions"})
for event in event_data:
self.tournament.events.append(self._parse_event(event))
loop = asyncio.new_event_loop()
loop.run_until_complete(self._get_fencers())
return self.tournament
async def _get_fencers(self):
"""Get event information asynchronously."""
def get_fencers(event):
fencers_url = self.FENCERS_URL.format(event_id=event.ftl_id)
response = requests.get(fencers_url).json()
event.fencers = [
Fencer(name=f['name'], ftl_id=f['id'],
is_checked_in=(f['status'] == 'CheckedIn'))
for f in response if f['status'] != 'Scratched']
return event
with ThreadPoolExecutor(max_workers=20) as executor:
loop = asyncio.get_event_loop()
futures = []
for event in self.tournament.events:
futures.append(loop.run_in_executor(executor, get_fencers, event))
await asyncio.gather(*futures)
self.tournament.count_fencers()
def _parse_event(self, event_etree):
"""Extract useful strings from the event info."""
try:
name = event_etree.getchildren()[1].text_content().strip()
ftl_id = event_etree.attrib['data-href'].split('/')[-1]
url = self.EVENT_URL.format(event_id=ftl_id)
time_str = event_etree.getchildren()[0].text_content().strip()
table = next(event_etree.iterancestors('table'))
date_str = next(table.itersiblings('h5', preceding=True)).text_content()
dt_str = '{} {}'.format(date_str, time_str)
time = datetime.strptime(dt_str, self.EVENT_DATETIME_FORMAT)
return Event(name=name, url=url, time=time, tournament=self.tournament,
phase=self._get_event_phase(event_etree), ftl_id=ftl_id)
except (IndexError, ScrapeError) as exc:
raise ScrapeError("Failed to interpret live results for event \"{}\". {}"
.format(name, exc))
def _get_event_phase(self, event_etree): # pylint: disable=no-self-use
"""Determine the state of the event."""
if event_etree.xpath('td[3]/text()[contains(., "Finished")]'):
event_phase = EventPhase.FINISHED
elif event_etree.xpath('td[3]/text()[contains(., "Fencing")]'):
event_phase = EventPhase.STARTED
else:
event_phase = EventPhase.REGISTRATION
return event_phase
class ScrapeError(Exception):
......
......@@ -7,40 +7,34 @@
<p>
Welcome! Please select a Live Results URL, or enter your own.
</p>
<p>
<form action="{{ url_for('live') }}" method="get">
<label>Live Results URL: &nbsp;
<select name="results_url" onchange="this.form.submit()">
<option value="" selected disabled>-- Select One --</option>
<optgroup label="Escrime Management">
<option value="http://escrimeresults.com/tournaments/NCAA.html">NCAA</option>
<option value="http://escrimeresults.com/tournaments/Atlantic-Coast-Conference.html">Atlantic Coast Conference</option>
<option value="http://escrimeresults.com/tournaments/Ivy-League.html">Ivy League</option>
<option value="http://escrimeresults.com/cobra/">Cobra</option>
<option value="http://escrimeresults.com/thrust/">Thrust</option>
<option value="http://escrimeresults.com/tournaments/NJSIAA.html">NJSIAA</option>
<option value="http://escrimeresults.com/Candlewood/">Candlewood</option>
<option value="http://escrimeresults.com/tournaments/U-Penn.html">U Penn</option>
<option value="http://escrimeresults.com/tournaments/Temple-University.html">Temple U</option>
<option value="http://escrimeresults.com/Big1/">Big1</option>
<option value="http://escrimeresults.com/tournaments/US-Collegiate-Squad-Championships.html">US Collegiate Squad</option>
<option value="http://escrimeresults.com/MKHS/">MKHS</option>
</optgroup>
<optgroup label="NJ Division">
<option value="http://njfencingresults.org/liveresults/">NJFencingResults.org/liveresults</option>
</optgroup>
</select>
</label>
<input type="submit" value="Go!">
</form>
</p>
<p>
<form action="{{ url_for('live') }}" method="get">
<label>Other URL: &nbsp;
<input name="results_url" placeholder="example.com/liveresults">
</label>
<input type="submit" value="Go!">
</form>
</p>
<section>
<h2>Fencing Time Live</h2>
<p>
<form action="{{ url_for('live') }}" method="get">
<label>Choose a Tournament: &nbsp;
<select name="ftl_id" onchange="this.form.submit()">
<option value="" selected disabled>-- Select One --</option>
{% for tournament in tournaments | sort(attribute='start') %}
{% if loop.changed(tournament.start) %}</optgroup><optgroup label="{{ tournament.start | strftime(time=False) }}">{% endif %}
<option value="{{ tournament['id'] }}">{{ tournament['name'] }} ({{ tournament['location'] }})</option>
{% if loop.last %}</optgroup>{% endif %}
{% endfor %}
</select>
</label>
<input type="submit" value="Go!">
</form>
</p>
</section>
<section>
<h2>FTP Live Results</h2>
<p>
<form action="{{ url_for('live') }}" method="get">
<label>Enter a custom Live Results link: &nbsp;
<input name="results_url" placeholder="example.com/liveresults">
</label>
<input type="submit" value="Go!">
</form>
</p>
</section>
</main>
{% endblock content %}
......@@ -5,12 +5,12 @@
{% block content %}
<header>
<span class="back-to-home"><a href="{{ url_for('index') }}">Back to Home</a></span>
<h1>{{ tournament.name }} - {{ events | length }} events
<h1>{{ tournament.name }} - {{ tournament.events | length }} events
<a href="{{ tournament.url }}" target="_blank">
<img class="ext-link" src="{{ url_for('static', filename='images/font-awesome/external-link-alt.svg') }}?t=20180415"></a></h1>
</header>
<main>
{% for e in events | sort(attribute='time') %}
{% for e in tournament.events | sort(attribute='time') %}
{% if loop.changed(e.time.date()) %}
<header><h2>{{ e.time | strftime(time=False) }}</h2></header>
{% endif %}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment