diff --git a/src/languedoc/predict.py b/src/languedoc/predict.py --- a/src/languedoc/predict.py +++ b/src/languedoc/predict.py @@ -3,6 +3,7 @@ import re import itertools import json import gzip +from typing import Union TOP_NGRAM_COUNT = 3000 MODEL_PATH = os.path.join(os.path.dirname(__file__), "models.json.gz") @@ -46,7 +47,7 @@ def extract_ngram_counts(text: str) -> d return counts -def rank_ngram_counts(counts: dict[str, int]) -> dict[str, int]: +def rank_ngram_counts(counts: dict[str, Union[int, float]]) -> dict[str, int]: """Order supplied ngrams by their counts (then length, then alphabetically) and return their ranking. :param counts: a dict mapping ngrams to their counts