From 5d5d3a619262113ca802dfe61f5ac24c3c8b5675 Mon Sep 17 00:00:00 2001 From: Anton Sarukhanov <code@ant.sr> Date: Thu, 13 Oct 2022 00:44:33 +0000 Subject: [PATCH] Update for prod --- app.py | 2 ++ scraper.py | 30 +++++++++++++++++++----------- 2 files changed, 21 insertions(+), 11 deletions(-) diff --git a/app.py b/app.py index bbea236..d476316 100644 --- a/app.py +++ b/app.py @@ -14,6 +14,8 @@ DISPLAY_DATE_FORMAT = '%A, %B %-d, %Y' DISPLAY_TIME_FORMAT = '%-I:%M %p on %A' +app.config['APPLICATION_ROOT'] = '/armory' + def _make_cache_key(): """Create a cache key for Flask-Caching.""" path = request.path diff --git a/scraper.py b/scraper.py index 37f9e59..1291477 100644 --- a/scraper.py +++ b/scraper.py @@ -5,6 +5,7 @@ from concurrent.futures import ThreadPoolExecutor from datetime import date, datetime, timedelta from urllib.parse import urlparse, urljoin, urlencode from lxml import html # nosec ; Bandit suggests defusedxml but defusedxml.lxml is dead +from json.decoder import JSONDecodeError import requests from models import Event, EventPhase, Fencer, Tournament @@ -125,27 +126,34 @@ class FTLiveScraper(Scraper): """ BASE_URL = 'https://fencingtimelive.com' - TOURNAMENTS_URL = urljoin(BASE_URL, 'tournaments/list/data?{query}') + TOURNAMENTS_URL = urljoin(BASE_URL, 'tournaments/search/data?{query}') TOURNAMENT_URL = urljoin(BASE_URL, 'tournaments/eventSchedule/{tournament_id}') FENCERS_URL = urljoin(BASE_URL, 'events/competitors/data/{event_id}') EVENT_URL = urljoin(BASE_URL, 'events/view/{event_id}') START_FORMAT = '%Y-%m-%dT%H:%M:%S.000Z' EVENT_DATETIME_FORMAT = '%A %B %d, %Y %I:%M %p' - MAX_AGO = timedelta(days=7) - MAX_AHEAD = timedelta(days=7) + DATE_MODE = -2 # Last 30 days + SEARCH_FROM = timedelta(days=21) # Start search x days into future - def list_tournaments(self, search=None, from_date=None, to_date=None): + def list_tournaments(self, search=None, date_mode=None, search_date=None): """Get a list of tournaments in FTLive.""" - if not search and not from_date and not to_date: - from_date = date.today() - self.MAX_AGO - to_date = date.today() + self.MAX_AHEAD + if not search_date: + search_date = date.today() + self.SEARCH_FROM + if not date_mode: + date_mode = self.DATE_MODE args = { - 'tname': search or '', - 'from': from_date or '', - 'to': to_date or '' + 'search': search or '', + 'today': search_date, + 'date': date_mode, + 'filter': 'Country', + 'country': 'USA', } url = self.TOURNAMENTS_URL.format(query=urlencode(args)) - tournaments = requests.get(url).json() + try: + tournaments = requests.get(url).json() + except JSONDecodeError: + raise ScrapeError("Failed to decode tournament list from URL {url}" + .format(url=url)) return [{'start': datetime.strptime(t['start'], self.START_FORMAT), 'id': t['id'], 'name': t['name'], -- GitLab