diff --git a/app.py b/app.py index 516956dfc414fa22a069f6e1ece4bc4831385152..4ec13b2e436f3af757189e567bce9eade6393820 100644 --- a/app.py +++ b/app.py @@ -1,39 +1,41 @@ -from urllib.parse import urlparse, urljoin -import re from flask import Flask, render_template, redirect, request, url_for from flask_caching import Cache -from lxml import html -import requests -from scrape import scrape +from scraper import Scraper app = Flask(__name__) cache = Cache(app, config={'CACHE_TYPE': 'simple'}) + def make_cache_key(*args, **kwargs): path = request.path args = str(hash(frozenset(request.args.items()))) return (path + args).encode('utf-8') + @app.after_request def add_header(response): response.cache_control.max_age = 300 return response + @app.route("/") def index(): return render_template('index.html') + @app.route("/live") @cache.cached(timeout=300, key_prefix=make_cache_key) def live(results_url=None): results_url = request.args.get('results_url') if not results_url: return redirect(url_for('index')) - tournament_name, tournament_details, events = scrape(results_url) + + scraper = Scraper(results_url) + tournament = scraper.scrape() return render_template('live.html', - tournament_name = tournament_name, - tournament_details = tournament_details, - events = events) + tournament_name=tournament.name, + events=tournament.events) + if __name__ == "__main__": app.run() diff --git a/models.py b/models.py new file mode 100644 index 0000000000000000000000000000000000000000..de9651e7401c9678515c5c6d08ad8cb1402279a3 --- /dev/null +++ b/models.py @@ -0,0 +1,83 @@ +"""Domain-specific class definitions.""" + + +class Tournament: + def __init__(self, name, url, events=None): + self.name = name + self.url = url + self.events = events or [] + + def add_event(self, event): + self.events.append(event) + event.tournament = self + + def count_all_fencers(self): + for event in self.events: + event.count_fencers() + + +class EventStatus: + def __init__(self, name): + self.name = name + + def __repr__(self): + return self.name + + +class Event: + STATUS_REGISTRATION = EventStatus("Registration") + STATUS_STARTED = EventStatus("Started") + STATUS_FINISHED = EventStatus("Finished") + + def __init__(self, name, time, status, fencers, tournament=None): + self.name = name + self.time = time + self.status = status + self.fencers = fencers + self.tournament = tournament + + def __repr__(self): + return self.name + + def count_fencers(self): + """Count the fencers in an event. + Result is divided by status (checked in, not checked in, etc.)""" + self.fencers_checked_in = [] + self.new_fencers_not_checked_in = [] + self.previously_fenced = {} + self.previous_total = 0 + if self.name == 'Y-12 Men\'s Foil': + print(self.fencers) + for fencer in self.fencers: + if fencer.is_checked_in: + self.fencers_checked_in.append(fencer) + else: + self.new_fencers_not_checked_in.append(fencer) + for event in self.tournament.events: + if event.name == self.name: + break + if fencer in event.fencers: + if event.name in self.previously_fenced: + self.previously_fenced[event.name] += 1 + else: + self.previously_fenced[event.name] = 1 + self.previous_total += 1 + try: + self.new_fencers_not_checked_in.remove(fencer) + except ValueError: + pass # already removed; ignore + break + + +class Fencer: + + def __init__(self, name, is_checked_in): + self.name = name.strip() + self.is_checked_in = is_checked_in + + def __repr__(self): + return '<Fencer name="{}" is_checked_in="{}">'.format( + self.name, self.is_checked_in) + + def __eq__(self, other): + return self.name == other.name diff --git a/scrape.py b/scrape.py deleted file mode 100644 index 1b145775edcd38b29a280e5db1f89192d0ceee72..0000000000000000000000000000000000000000 --- a/scrape.py +++ /dev/null @@ -1,88 +0,0 @@ -from lxml import html -import requests -import re -from urllib.parse import urlparse, urljoin -from itertools import repeat - - -def scrape(results_url): - try: - results = requests.get(results_url) - except requests.exceptions.MissingSchema: - results = requests.get("http://{}".format(results_url)) - results_tree = html.fromstring(results.content) - try: - event_urls = results_tree.xpath( - '//div[@id="schedule"]/table/tr/td/a[text()="View"]/@href') - except IndexError: - return "No event schedule found." - try: - tournament_name = results_tree.xpath( - '//span[@class="tournName"]/text()')[0] - tournament_details = results_tree.xpath( - '//span[@class="tournDetails"]/text()')[0] - except IndexError: - return "Tournament info not found." - events = [] - for event_url in event_urls: - if not urlparse(event_url).netloc: - event_url = urljoin(results.url, event_url) - event = requests.get(event_url) - event_tree = html.fromstring(event.content) - event_details = event_tree.xpath( - '//span[@class="tournDetails"]/text()') - try: - event_name = event_details[0] - event_time = event_details[1] - except: - raise(Exception("Failed to interpret live results for event \"{}\".".format(event_url))) - if event_tree.xpath('//a[text()="Final Results"]'): - fencers = event_tree.xpath('//div[@id="finalResults"]/table/tr/td[2]/text()') - fencers = dict(zip(fencers, repeat("Checked In"))) - event_status = "closed" - elif event_tree.xpath('//a[text()="Seeding"]'): - fencers = event_tree.xpath('//div[@id="Round1Seeding"]/table/tr/td[2]/text()') - fencers = dict(zip(fencers, repeat("Checked In"))) - event_status = "ongoing" - elif event_tree.xpath('//a[text()="Check-In Status"]'): - event_status = "open" - fencers_checked_in = [True if len(list(f)) else False for f in event_tree.xpath('//div[@id="checkIn"]/table/tr/td[1]')] - fencers = event_tree.xpath('//div[@id="checkIn"]/table/tr/td[2]/text()') - fencers = dict(zip(fencers, fencers_checked_in)) - try: - del this_event - except: - pass # not yet set, oh well - this_event = { - 'name': event_name, - 'time': event_time, - 'status': event_status, - 'fencers': [], - 'fencers_checked_in': [], - 'new_fencers_not_checked_in': [], - 'previously_fenced': {}, - 'previous_total': 0 - } - for fencer, is_checked_in in fencers.items(): - fencer = fencer.strip() - this_event['fencers'].append(fencer) - if is_checked_in: - this_event['fencers_checked_in'].append(fencer) - else: - this_event['new_fencers_not_checked_in'].append(fencer) - for e in events: - if e['name'] == event_details: - continue - if fencer in e['fencers']: - if e['name'] in this_event['previously_fenced']: - this_event['previously_fenced'][e['name']] += 1 - else: - this_event['previously_fenced'][e['name']] = 1 - this_event['previous_total'] += 1 - try: - this_event['new_fencers_not_checked_in'].remove(fencer) - except ValueError: - pass # already removed; ignore - break - events.append(this_event) - return (tournament_name, tournament_details, events) diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000000000000000000000000000000000000..a564b0ce4c1086436db1208e6cc29162a22430a5 --- /dev/null +++ b/scraper.py @@ -0,0 +1,91 @@ +from lxml import html +import requests +from urllib.parse import urlparse, urljoin +from models import Event, Fencer, Tournament + + +class Scraper: + def __init__(self, tournament_url): + self.tournament_url = tournament_url + + def scrape(self): + + # Get tournament info + try: + results = requests.get(self.tournament_url) + except requests.exceptions.MissingSchema: + results = requests.get("http://{}".format(self.tournament_url)) + results_tree = html.fromstring(results.content) + try: + tournament_name = results_tree.xpath( + '//span[@class="tournName"]/text()')[0] + except IndexError: + raise ScrapeError("Tournament info not found.") + + self.tournament = Tournament(tournament_name, results.url) + + # Get tournament events + try: + event_urls = results_tree.xpath( + '//div[@id="schedule"]/table/tr/td/a[text()="View"]/@href') + except IndexError: + raise ScrapeError("No event schedule found.") + self.scrape_events(event_urls) + + return self.tournament + + def scrape_events(self, event_urls): + for event_url in event_urls: + + # Build full event URL (scraped URLs are relative) + # TODO: Is there a cleaner (less "DIY") way to do this? + if not urlparse(event_url).netloc: + event_url = urljoin(self.tournament.url, event_url) + + event = self.scrape_event(event_url) + self.tournament.add_event(event) + self.tournament.count_all_fencers() + + def scrape_event(self, event_url): + # Request event page + event = requests.get(event_url) + + # Get the event details (name, time) as text + event_tree = html.fromstring(event.content) + event_details = event_tree.xpath( + '//span[@class="tournDetails"]/text()') + try: + event_name = event_details[0] + event_time = event_details[1] + except IndexError: + raise ScrapeError( + "Failed to interpret live results for event \"{}\"." + .format(event_url)) + + # Get the event status + if event_tree.xpath('//a[text()="Final Results"]'): + fencers = event_tree.xpath( + '//div[@id="finalResults"]/table/tr/td[2]/text()') + fencers = [Fencer(f, True) for f in fencers] + event_status = Event.STATUS_FINISHED + elif event_tree.xpath('//a[text()="Seeding"]'): + fencers = event_tree.xpath( + '//div[@id="Round1Seeding"]/table/tr/td[2]/text()') + fencers = [Fencer(f, True) for f in fencers] + event_status = Event.STATUS_STARTED + elif event_tree.xpath('//a[text()="Check-In Status"]'): + event_status = Event.STATUS_REGISTRATION + fencers_checked_in = [ + True if len(list(f)) else False + for f in event_tree.xpath( + '//div[@id="checkIn"]/table/tr/td[1]')] + fencers = event_tree.xpath( + '//div[@id="checkIn"]/table/tr/td[2]/text()') + fencers = [Fencer(f, ci) + for (f, ci) in zip(fencers, fencers_checked_in)] + + return Event(event_name, event_time, event_status, fencers) + + +class ScrapeError(Exception): + pass diff --git a/static/css/style.css b/static/css/style.css index 15a876fe1c999b7cd4037fbdb8d3e109d92fe29c..0e3c75cec9b544b9f84593f4cf321c41e55f177a 100644 --- a/static/css/style.css +++ b/static/css/style.css @@ -55,21 +55,22 @@ a:hover { float: right; color: #000; border-radius: .25em; - border: 2px solid #fff; - padding: .1em; + border: 1.5px solid #fff; + padding: .1em .2em; font-size: .8em; font-weight: bold; margin-bottom: .1em; vertical-align: top; + text-transform: uppercase; } -.status-open .status { - background-color: #0f0; +.status-registration .status { + background-color: #2f2; } -.status-ongoing .status { - background-color: #ff0; +.status-started .status { + background-color: #fc0; } -.status-closed .status { - background-color: #f00; +.status-finished .status { + background-color: #f22; } /* Numbers */ diff --git a/templates/base.html b/templates/base.html index 6ed2e218e352008f54a318666b50db10a6da8098..6b7a7a8497e12193333ac2ba332a9b1b40d8997b 100644 --- a/templates/base.html +++ b/templates/base.html @@ -4,17 +4,17 @@ <meta charset="utf-8"> <title>Armory Dashboard</title> <meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=1, user-scalable=0"> - <link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}?t=20180325"> - <link rel="apple-touch-icon" sizes="180x180" href="{{ url_for('static', filename='images/icons/apple-touch-icon.png') }}?t=20180325"> - <link rel="icon" type="image/png" sizes="32x32" href="{{ url_for('static', filename='images/icons/favicon-32x32.png') }}?t=20180325"> - <link rel="icon" type="image/png" sizes="16x16" href="{{ url_for('static', filename='images/icons/favicon-16x16.png') }}?t=20180325"> - <link rel="manifest" href="{{ url_for('static', filename='images/icons/site.webmanifest') }}?t=20180325"> - <link rel="mask-icon" href="{{ url_for('static', filename='images/icons/safari-pinned-tab.svg') }}?t=20180325" color="#5bbad5"> - <link rel="shortcut icon" href="{{ url_for('static', filename='images/icons/favicon.ico') }}?t=20180325"> + <link rel="stylesheet" href="{{ url_for('static', filename='css/style.css') }}?t=20180415"> + <link rel="apple-touch-icon" sizes="180x180" href="{{ url_for('static', filename='images/icons/apple-touch-icon.png') }}?t=20180415"> + <link rel="icon" type="image/png" sizes="32x32" href="{{ url_for('static', filename='images/icons/favicon-32x32.png') }}?t=20180415"> + <link rel="icon" type="image/png" sizes="16x16" href="{{ url_for('static', filename='images/icons/favicon-16x16.png') }}?t=20180415"> + <link rel="manifest" href="{{ url_for('static', filename='images/icons/site.webmanifest') }}?t=20180415"> + <link rel="mask-icon" href="{{ url_for('static', filename='images/icons/safari-pinned-tab.svg') }}?t=20180415" color="#5bbad5"> + <link rel="shortcut icon" href="{{ url_for('static', filename='images/icons/favicon.ico') }}?t=20180415"> <meta name="apple-mobile-web-app-title" content="Armory"> <meta name="application-name" content="Armory"> <meta name="msapplication-TileColor" content="#da532c"> - <meta name="msapplication-config" content="{{ url_for('static', filename='images/icons/browserconfig.xml') }}?t=20180325"> + <meta name="msapplication-config" content="{{ url_for('static', filename='images/icons/browserconfig.xml') }}?t=20180415"> <meta name="theme-color" content="#ffffff"> {% block extra_head %}{% endblock extra_head %} </head> diff --git a/templates/live.html b/templates/live.html index e04a1d06c45c499ac743ce8b92cf0db421d9716f..ae427f391590f3370b19960066cd258dcddc8748 100644 --- a/templates/live.html +++ b/templates/live.html @@ -9,9 +9,9 @@ </header> <main> {% for e in events %} - <section class="status-{{ e['status'] }}"> + <section class="status-{{ e['status']|lower }}"> <header> - <div class="status">{{ e['status'] }}</div> + <div class="status">{{ e['status']|lower }}</div> <div class="name"> <a name="{{ e['name'] }}" href="#{{ e['name'] }}">{{ e['name'] }}</a> </div>