Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Armory Dashboard
Manage
Activity
Members
Labels
Plan
Issues
1
Issue boards
Milestones
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container Registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Anton Sarukhanov
Armory Dashboard
Commits
c1541bda
Commit
c1541bda
authored
5 years ago
by
Anton Sarukhanov
Browse files
Options
Downloads
Patches
Plain Diff
Add support for Fencing Time Live.
parent
ed4197e0
No related branches found
No related tags found
No related merge requests found
Pipeline
#105
passed with stage
Stage:
in 43 seconds
Changes
5
Pipelines
1
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
app.py
+23
-16
23 additions, 16 deletions
app.py
models.py
+6
-3
6 additions, 3 deletions
models.py
scraper.py
+127
-14
127 additions, 14 deletions
scraper.py
templates/index.html
+29
-35
29 additions, 35 deletions
templates/index.html
templates/live.html
+2
-2
2 additions, 2 deletions
templates/live.html
with
187 additions
and
70 deletions
app.py
+
23
−
16
View file @
c1541bda
...
...
@@ -2,10 +2,10 @@
from
flask
import
Flask
,
render_template
,
redirect
,
request
,
url_for
from
flask_caching
import
Cache
from
scraper
import
FTPScraper
from
scraper
import
FTPScraper
,
FTLiveScraper
import
models
# pylint: disable=invalid-name
# pylint: disable=invalid-name
; These module-level variables are standard for Flask.
app
=
Flask
(
__name__
)
cache
=
Cache
(
app
,
config
=
{
'
CACHE_TYPE
'
:
'
simple
'
})
...
...
@@ -14,6 +14,13 @@ DISPLAY_DATE_FORMAT = '%A, %B %-d, %Y'
DISPLAY_TIME_FORMAT
=
'
%-I:%M %p on %A
'
def
_make_cache_key
():
"""
Create a cache key for Flask-Caching.
"""
path
=
request
.
path
args
=
str
(
hash
(
frozenset
(
request
.
args
.
items
())))
return
(
path
+
args
).
encode
(
'
utf-8
'
)
@app.after_request
def
add_header
(
response
):
"""
Add an HTTP response header for cache invalidation.
"""
...
...
@@ -23,36 +30,36 @@ def add_header(response):
@app.template_filter
(
'
strftime
'
)
def
_jinja2_filter_datetime
(
datetime
,
date
=
True
,
time
=
True
):
"""
Format a DateTime for display to a user.
"""
return
datetime
.
strftime
(
DISPLAY_DATETIME_FORMAT
if
date
and
time
else
DISPLAY_DATE_FORMAT
if
date
else
DISPLAY_TIME_FORMAT
if
time
else
''
)
@app.route
(
"
/
"
)
@cache.cached
(
timeout
=
300
)
def
index
():
"""
Render the app landing page.
"""
return
render_template
(
'
index.html
'
)
def
_make_cache_key
():
"""
Create a cache key for Flask-Caching.
"""
path
=
request
.
path
args
=
str
(
hash
(
frozenset
(
request
.
args
.
items
())))
return
(
path
+
args
).
encode
(
'
utf-8
'
)
ftl_scraper
=
FTLiveScraper
()
return
render_template
(
'
index.html
'
,
tournaments
=
ftl_scraper
.
list_tournaments
())
@app.route
(
"
/live
"
)
@cache.cached
(
timeout
=
300
,
key_prefix
=
_make_cache_key
)
def
live
(
results_url
=
None
):
def
live
():
"""
Render the primary view of live tournament stats.
"""
results_url
=
request
.
args
.
get
(
'
results_url
'
)
if
not
results_url
:
ftl_id
=
request
.
args
.
get
(
'
ftl_id
'
)
if
ftl_id
:
tournament
=
FTLiveScraper
().
scrape_tournament
(
tournament_id
=
ftl_id
)
elif
results_url
:
tournament
=
FTPScraper
(
results_url
).
scrape_tournament
()
else
:
return
redirect
(
url_for
(
'
index
'
))
scraper
=
FTPScraper
(
results_url
)
tournament
=
scraper
.
scrape
()
return
render_template
(
'
live.html
'
,
tournament
=
tournament
,
events
=
tournament
.
events
,
phases
=
models
.
EventPhase
)
return
render_template
(
'
live.html
'
,
tournament
=
tournament
,
phases
=
models
.
EventPhase
)
if
__name__
==
"
__main__
"
:
...
...
This diff is collapsed.
Click to expand it.
models.py
+
6
−
3
View file @
c1541bda
...
...
@@ -6,7 +6,7 @@ from datetime import datetime
from
typing
import
List
from
enum
import
Enum
Fencer
=
namedtuple
(
'
Fencer
'
,
'
name is_checked_in
'
)
Fencer
=
namedtuple
(
'
Fencer
'
,
'
name is_checked_in
ftl_id
'
,
defaults
=
[
None
]
)
setattr
(
Fencer
,
'
__eq__
'
,
lambda
f1
,
f2
:
f1
.
name
==
f2
.
name
)
...
...
@@ -16,7 +16,9 @@ class Tournament:
name
:
str
url
:
str
updated
:
datetime
ftl_id
:
str
=
None
# type: ignore
updated
:
datetime
=
None
# type: ignore
location
:
str
=
None
# type: ignore
events
:
List
[
'
Event
'
]
=
field
(
default_factory
=
list
)
def
count_fencers
(
self
):
...
...
@@ -47,7 +49,7 @@ class EventStatistics:
@dataclass
class
Event
:
class
Event
:
# pylint: disable=too-many-instance-attributes
"""
A single event in a tournament (e.g. Y12 Mens Foil).
"""
name
:
str
...
...
@@ -55,6 +57,7 @@ class Event:
time
:
datetime
tournament
:
Tournament
phase
:
EventPhase
ftl_id
:
str
=
None
# type: ignore
stats
:
EventStatistics
=
field
(
default_factory
=
EventStatistics
)
fencers
:
List
[
Fencer
]
=
field
(
default_factory
=
list
)
...
...
This diff is collapsed.
Click to expand it.
scraper.py
+
127
−
14
View file @
c1541bda
...
...
@@ -2,14 +2,24 @@
import
asyncio
from
concurrent.futures
import
ThreadPoolExecutor
from
datetime
import
date
time
from
urllib.parse
import
urlparse
,
urljoin
from
lxml
import
html
# nosec Bandit suggests defusedxml but defusedxml.lxml is dead
from
datetime
import
date
,
datetime
,
timedelta
from
urllib.parse
import
urlparse
,
urljoin
,
urlencode
from
lxml
import
html
# nosec
;
Bandit suggests defusedxml but defusedxml.lxml is dead
import
requests
from
models
import
Event
,
EventPhase
,
Fencer
,
Tournament
# pylint: disable=too-few-public-methods ; I'm ok with that.
class
FTPScraper
:
class
Scraper
:
"""
Base class.
"""
def
__init__
(
self
):
"""
Initialize common args for scrapers.
"""
self
.
tournament
=
None
class
FTPScraper
(
Scraper
):
"""
Scraper for tournaments hosted on an FTP server.
This reads the original Fencing Time results pages, hosted by individual
...
...
@@ -22,18 +32,20 @@ class FTPScraper:
def
__init__
(
self
,
tournament_url
):
"""
Set up the scraper instance.
"""
self
.
tournament_url
=
tournament_url
s
elf
.
tournament
=
None
s
uper
(
FTPScraper
,
self
).
__init__
()
def
scrape
(
self
):
def
scrape
_tournament
(
self
):
"""
Get all tournament information.
"""
try
:
results
=
requests
.
get
(
self
.
tournament_url
)
except
requests
.
exceptions
.
MissingSchema
:
results
=
requests
.
get
(
"
http://{}
"
.
format
(
self
.
tournament_url
))
results_
tree
=
html
.
fromstring
(
results
.
content
)
tournament_e
tree
=
html
.
fromstring
(
results
.
content
)
try
:
tournament_name
=
results_tree
.
xpath
(
'
//span[@class=
"
tournName
"
]/text()
'
)[
0
]
updated_str
=
(
results_tree
.
xpath
(
'
//span[@class=
"
lastUpdate
"
]/text()
'
)[
0
]
tournament_name
=
tournament_etree
.
xpath
(
'
//span[@class=
"
tournName
"
]/text()
'
)[
0
]
updated_str
=
(
tournament_etree
.
xpath
(
'
//span[@class=
"
lastUpdate
"
]/text()
'
)[
0
]
.
replace
(
'
Last Updated:
'
,
''
).
strip
())
updated
=
datetime
.
strptime
(
updated_str
,
self
.
UPDATED_DATETIME_FORMAT
)
except
IndexError
:
...
...
@@ -44,7 +56,7 @@ class FTPScraper:
# Get tournament events
try
:
event_urls
=
results_
tree
.
xpath
(
event_urls
=
tournament_e
tree
.
xpath
(
'
//div[@id=
"
schedule
"
]/table/tr/td/a[text()=
"
View
"
]/@href
'
)
except
IndexError
:
raise
ScrapeError
(
"
No event schedule found.
"
)
...
...
@@ -66,12 +78,12 @@ class FTPScraper:
futures
.
append
(
loop
.
run_in_executor
(
executor
,
requests
.
get
,
event_url
))
for
response
in
await
asyncio
.
gather
(
*
futures
):
event
=
self
.
parse_event
(
response
)
event
=
self
.
_
parse_event
(
response
)
self
.
tournament
.
events
.
append
(
event
)
self
.
tournament
.
count_fencers
()
def
parse_event
(
self
,
event
):
def
_
parse_event
(
self
,
event
):
"""
Extract useful strings from the event info.
"""
event_tree
=
html
.
fromstring
(
event
.
content
)
event_details
=
event_tree
.
xpath
(
'
//span[@class=
"
tournDetails
"
]/text()
'
)
...
...
@@ -106,12 +118,113 @@ class FTPScraper:
url
=
event
.
url
,
fencers
=
fencers
,
tournament
=
self
.
tournament
)
class
FTLiveScraper
(
FTP
Scraper
):
class
FTLiveScraper
(
Scraper
):
"""
Scraper for tournaments hosted on fencingtimelive.com.
This reads the newer-style pages, centrally hosted by Fencing Time.
"""
# to do...
BASE_URL
=
'
https://fencingtimelive.com
'
TOURNAMENTS_URL
=
urljoin
(
BASE_URL
,
'
tournaments/list/data?{query}
'
)
TOURNAMENT_URL
=
urljoin
(
BASE_URL
,
'
tournaments/eventSchedule/{tournament_id}
'
)
FENCERS_URL
=
urljoin
(
BASE_URL
,
'
events/competitors/data/{event_id}
'
)
EVENT_URL
=
urljoin
(
BASE_URL
,
'
events/view/{event_id}
'
)
START_FORMAT
=
'
%Y-%m-%dT%H:%M:%S.000Z
'
EVENT_DATETIME_FORMAT
=
'
%A %B %d, %Y %I:%M %p
'
MAX_AGO
=
timedelta
(
days
=
7
)
MAX_AHEAD
=
timedelta
(
days
=
7
)
def
list_tournaments
(
self
,
search
=
None
,
from_date
=
None
,
to_date
=
None
):
"""
Get a list of tournaments in FTLive.
"""
if
not
search
and
not
from_date
and
not
to_date
:
from_date
=
date
.
today
()
-
self
.
MAX_AGO
to_date
=
date
.
today
()
+
self
.
MAX_AHEAD
args
=
{
'
tname
'
:
search
or
''
,
'
from
'
:
from_date
or
''
,
'
to
'
:
to_date
or
''
}
url
=
self
.
TOURNAMENTS_URL
.
format
(
query
=
urlencode
(
args
))
tournaments
=
requests
.
get
(
url
).
json
()
return
[{
'
start
'
:
datetime
.
strptime
(
t
[
'
start
'
],
self
.
START_FORMAT
),
'
id
'
:
t
[
'
id
'
],
'
name
'
:
t
[
'
name
'
],
'
location
'
:
t
[
'
location
'
]}
for
t
in
tournaments
]
def
scrape_tournament
(
self
,
tournament_id
):
"""
Get all tournament information.
"""
tournament_url
=
self
.
TOURNAMENT_URL
.
format
(
tournament_id
=
tournament_id
)
tournament_html
=
requests
.
get
(
tournament_url
).
content
tournament_etree
=
html
.
fromstring
(
tournament_html
)
try
:
tournament_name
=
tournament_etree
.
xpath
(
'
//div[@class=
"
desktop tournName
"
]/text()
'
)[
0
]
except
IndexError
:
raise
ScrapeError
(
"
Tournament info not found.
"
)
self
.
tournament
=
Tournament
(
name
=
tournament_name
,
url
=
tournament_url
,
ftl_id
=
tournament_id
)
event_data
=
tournament_etree
.
xpath
(
"
//tr[re:test(@id,
'
ev_.*
'
)]
"
,
namespaces
=
{
"
re
"
:
"
http://exslt.org/regular-expressions
"
})
for
event
in
event_data
:
self
.
tournament
.
events
.
append
(
self
.
_parse_event
(
event
))
loop
=
asyncio
.
new_event_loop
()
loop
.
run_until_complete
(
self
.
_get_fencers
())
return
self
.
tournament
async
def
_get_fencers
(
self
):
"""
Get event information asynchronously.
"""
def
get_fencers
(
event
):
fencers_url
=
self
.
FENCERS_URL
.
format
(
event_id
=
event
.
ftl_id
)
response
=
requests
.
get
(
fencers_url
).
json
()
event
.
fencers
=
[
Fencer
(
name
=
f
[
'
name
'
],
ftl_id
=
f
[
'
id
'
],
is_checked_in
=
(
f
[
'
status
'
]
==
'
CheckedIn
'
))
for
f
in
response
if
f
[
'
status
'
]
!=
'
Scratched
'
]
return
event
with
ThreadPoolExecutor
(
max_workers
=
20
)
as
executor
:
loop
=
asyncio
.
get_event_loop
()
futures
=
[]
for
event
in
self
.
tournament
.
events
:
futures
.
append
(
loop
.
run_in_executor
(
executor
,
get_fencers
,
event
))
await
asyncio
.
gather
(
*
futures
)
self
.
tournament
.
count_fencers
()
def
_parse_event
(
self
,
event_etree
):
"""
Extract useful strings from the event info.
"""
try
:
name
=
event_etree
.
getchildren
()[
1
].
text_content
().
strip
()
ftl_id
=
event_etree
.
attrib
[
'
data-href
'
].
split
(
'
/
'
)[
-
1
]
url
=
self
.
EVENT_URL
.
format
(
event_id
=
ftl_id
)
time_str
=
event_etree
.
getchildren
()[
0
].
text_content
().
strip
()
table
=
next
(
event_etree
.
iterancestors
(
'
table
'
))
date_str
=
next
(
table
.
itersiblings
(
'
h5
'
,
preceding
=
True
)).
text_content
()
dt_str
=
'
{} {}
'
.
format
(
date_str
,
time_str
)
time
=
datetime
.
strptime
(
dt_str
,
self
.
EVENT_DATETIME_FORMAT
)
return
Event
(
name
=
name
,
url
=
url
,
time
=
time
,
tournament
=
self
.
tournament
,
phase
=
self
.
_get_event_phase
(
event_etree
),
ftl_id
=
ftl_id
)
except
(
IndexError
,
ScrapeError
)
as
exc
:
raise
ScrapeError
(
"
Failed to interpret live results for event
\"
{}
\"
. {}
"
.
format
(
name
,
exc
))
def
_get_event_phase
(
self
,
event_etree
):
# pylint: disable=no-self-use
"""
Determine the state of the event.
"""
if
event_etree
.
xpath
(
'
td[3]/text()[contains(.,
"
Finished
"
)]
'
):
event_phase
=
EventPhase
.
FINISHED
elif
event_etree
.
xpath
(
'
td[3]/text()[contains(.,
"
Fencing
"
)]
'
):
event_phase
=
EventPhase
.
STARTED
else
:
event_phase
=
EventPhase
.
REGISTRATION
return
event_phase
class
ScrapeError
(
Exception
):
...
...
This diff is collapsed.
Click to expand it.
templates/index.html
+
29
−
35
View file @
c1541bda
...
...
@@ -7,40 +7,34 @@
<p>
Welcome! Please select a Live Results URL, or enter your own.
</p>
<p>
<form
action=
"{{ url_for('live') }}"
method=
"get"
>
<label>
Live Results URL:
<select
name=
"results_url"
onchange=
"this.form.submit()"
>
<option
value=
""
selected
disabled
>
-- Select One --
</option>
<optgroup
label=
"Escrime Management"
>
<option
value=
"http://escrimeresults.com/tournaments/NCAA.html"
>
NCAA
</option>
<option
value=
"http://escrimeresults.com/tournaments/Atlantic-Coast-Conference.html"
>
Atlantic Coast Conference
</option>
<option
value=
"http://escrimeresults.com/tournaments/Ivy-League.html"
>
Ivy League
</option>
<option
value=
"http://escrimeresults.com/cobra/"
>
Cobra
</option>
<option
value=
"http://escrimeresults.com/thrust/"
>
Thrust
</option>
<option
value=
"http://escrimeresults.com/tournaments/NJSIAA.html"
>
NJSIAA
</option>
<option
value=
"http://escrimeresults.com/Candlewood/"
>
Candlewood
</option>
<option
value=
"http://escrimeresults.com/tournaments/U-Penn.html"
>
U Penn
</option>
<option
value=
"http://escrimeresults.com/tournaments/Temple-University.html"
>
Temple U
</option>
<option
value=
"http://escrimeresults.com/Big1/"
>
Big1
</option>
<option
value=
"http://escrimeresults.com/tournaments/US-Collegiate-Squad-Championships.html"
>
US Collegiate Squad
</option>
<option
value=
"http://escrimeresults.com/MKHS/"
>
MKHS
</option>
</optgroup>
<optgroup
label=
"NJ Division"
>
<option
value=
"http://njfencingresults.org/liveresults/"
>
NJFencingResults.org/liveresults
</option>
</optgroup>
</select>
</label>
<input
type=
"submit"
value=
"Go!"
>
</form>
</p>
<p>
<form
action=
"{{ url_for('live') }}"
method=
"get"
>
<label>
Other URL:
<input
name=
"results_url"
placeholder=
"example.com/liveresults"
>
</label>
<input
type=
"submit"
value=
"Go!"
>
</form>
</p>
<section>
<h2>
Fencing Time Live
</h2>
<p>
<form
action=
"{{ url_for('live') }}"
method=
"get"
>
<label>
Choose a Tournament:
<select
name=
"ftl_id"
onchange=
"this.form.submit()"
>
<option
value=
""
selected
disabled
>
-- Select One --
</option>
{% for tournament in tournaments | sort(attribute='start') %}
{% if loop.changed(tournament.start) %}
</optgroup><optgroup
label=
"{{ tournament.start | strftime(time=False) }}"
>
{% endif %}
<option
value=
"{{ tournament['id'] }}"
>
{{ tournament['name'] }} ({{ tournament['location'] }})
</option>
{% if loop.last %}
</optgroup>
{% endif %}
{% endfor %}
</select>
</label>
<input
type=
"submit"
value=
"Go!"
>
</form>
</p>
</section>
<section>
<h2>
FTP Live Results
</h2>
<p>
<form
action=
"{{ url_for('live') }}"
method=
"get"
>
<label>
Enter a custom Live Results link:
<input
name=
"results_url"
placeholder=
"example.com/liveresults"
>
</label>
<input
type=
"submit"
value=
"Go!"
>
</form>
</p>
</section>
</main>
{% endblock content %}
This diff is collapsed.
Click to expand it.
templates/live.html
+
2
−
2
View file @
c1541bda
...
...
@@ -5,12 +5,12 @@
{% block content %}
<header>
<span
class=
"back-to-home"
><a
href=
"{{ url_for('index') }}"
>
Back to Home
</a></span>
<h1>
{{ tournament.name }} - {{ events | length }} events
<h1>
{{ tournament.name }} - {{
tournament.
events | length }} events
<a
href=
"{{ tournament.url }}"
target=
"_blank"
>
<img
class=
"ext-link"
src=
"{{ url_for('static', filename='images/font-awesome/external-link-alt.svg') }}?t=20180415"
></a></h1>
</header>
<main>
{% for e in events | sort(attribute='time') %}
{% for e in
tournament.
events | sort(attribute='time') %}
{% if loop.changed(e.time.date()) %}
<header><h2>
{{ e.time | strftime(time=False) }}
</h2></header>
{% endif %}
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment