Skip to content
Snippets Groups Projects
Commit b8723db9 authored by Anton Sarukhanov's avatar Anton Sarukhanov
Browse files

Exclude redundant and likely-wrong formats.

parent 51269377
No related tags found
No related merge requests found
"""Function(s) for getting a format string from a date string.""" """Function(s) for getting a format string from a date string."""
import re import re
from copy import copy
from itertools import product from itertools import product
from collections import defaultdict, Counter from collections import defaultdict, Counter
from typing import Dict, Iterator, List from typing import Dict, Iterator, List
...@@ -15,9 +16,11 @@ FORMAT_DIRECTIVES = [ ...@@ -15,9 +16,11 @@ FORMAT_DIRECTIVES = [
DATE_TOKEN_REGEX = re.compile(r'([\w]+)|([\W]+)') DATE_TOKEN_REGEX = re.compile(r'([\w]+)|([\W]+)')
# pylint: disable=too-many-locals # pylint: disable=too-many-locals,too-many-branches
def get_all_format_strings(date_string, allow_reuse=False, def _get_all_format_strings(date_string, allow_reuse=False,
no_zero=False) -> Iterator[str]: include_day_of_year=False,
include_week_number=False,
no_zero=False) -> Iterator[str]:
"""Yield date format strings which match date_string.""" """Yield date format strings which match date_string."""
try: try:
parsed_datetime = parse(date_string) parsed_datetime = parse(date_string)
...@@ -26,7 +29,14 @@ def get_all_format_strings(date_string, allow_reuse=False, ...@@ -26,7 +29,14 @@ def get_all_format_strings(date_string, allow_reuse=False,
return return
tokens = DATE_TOKEN_REGEX.findall(date_string) tokens = DATE_TOKEN_REGEX.findall(date_string)
possible_matches = defaultdict(list) # type: Dict[str, List[str]] possible_matches = defaultdict(list) # type: Dict[str, List[str]]
for test_format in FORMAT_DIRECTIVES: format_directives = copy(FORMAT_DIRECTIVES)
if not include_day_of_year:
format_directives.remove('%-j')
format_directives.remove('%j')
if not include_week_number:
for directive in ('%W', '%w', '%U'):
format_directives.remove(directive)
for test_format in format_directives:
test_string = parsed_datetime.strftime(test_format) test_string = parsed_datetime.strftime(test_format)
possible_matches[test_string.lower()].append(test_format) possible_matches[test_string.lower()].append(test_format)
possible_parts = [] possible_parts = []
...@@ -46,7 +56,7 @@ def get_all_format_strings(date_string, allow_reuse=False, ...@@ -46,7 +56,7 @@ def get_all_format_strings(date_string, allow_reuse=False,
if not allow_reuse: if not allow_reuse:
counts = Counter(sequence) counts = Counter(sequence)
max_count = max( max_count = max(
counts[diretive] for diretive in FORMAT_DIRECTIVES) counts[diretive] for diretive in format_directives)
if max_count > 1: if max_count > 1:
continue continue
format_string = "".join(sequence) format_string = "".join(sequence)
...@@ -77,4 +87,4 @@ def _remove_extra_nlz(combinations) -> Iterator[List[str]]: ...@@ -77,4 +87,4 @@ def _remove_extra_nlz(combinations) -> Iterator[List[str]]:
def get_unique_format_strings(date_string, allow_reuse=False) -> List[str]: def get_unique_format_strings(date_string, allow_reuse=False) -> List[str]:
"""Return date format strings matching date_string, without duplicates.""" """Return date format strings matching date_string, without duplicates."""
return list(set(get_all_format_strings(date_string, allow_reuse))) return list(set(_get_all_format_strings(date_string, allow_reuse)))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment