diff --git a/src/languedoc/predict.py b/src/languedoc/predict.py
--- a/src/languedoc/predict.py
+++ b/src/languedoc/predict.py
@@ -14,7 +14,12 @@ def preprocess(text: str) -> str:
 	return text.lower()
 
 
-def extract_kgram_counts(text, k):
+def extract_kgram_counts(text: str, k: int) -> dict[str, int]:
+	"""Extract k-gram counts from the text for a provided k.
+
+	:param text: the source text
+	:param k: length of the kgrams to extract. 1 for letters, 2 for bigrams, ...
+	:return: a dict mapping kgrams to their counts in the text"""
 	n = len(text)
 	counts = dict()
 
@@ -28,7 +33,11 @@ def extract_kgram_counts(text, k):
 	return counts
 
 
-def extract_ngram_counts(text):
+def extract_ngram_counts(text: str) -> dict[str, int]:
+	"""Extract counts of 1- to 3-grams from the text.
+
+	:param text: the source text
+	:return: a dict mapping ngrams to their counts in the text"""
 	counts = dict()
 
 	for k in range(1, 4):
@@ -37,12 +46,20 @@ def extract_ngram_counts(text):
 	return counts
 
 
-def rank_ngram_counts(counts):
+def rank_ngram_counts(counts: dict[str, int]) -> dict[str, int]:
+	"""Order supplied ngrams by their counts (then length, then alphabetically) and return their ranking.
+
+	:param counts: a dict mapping ngrams to their counts
+	:return: a dict mapping ngrams to their rank (the most frequent: 0, the second: 1, ...)"""
 	ordered_ngrams = sorted(counts.items(), key=lambda kv: (-kv[1], len(kv[0]), kv[0]))[:TOP_NGRAM_COUNT]
-	return dict(zip([key for (key, freq) in ordered_ngrams], itertools.count(0)))
+	return dict(zip([key for (key, count) in ordered_ngrams], itertools.count(0)))
 
 
-def extract_ranked_ngrams(text):
+def extract_ranked_ngrams(text: str) -> dict[str, int]:
+	"""Extract ngrams from the text and rank them from the most common.
+
+	:param text: the source text
+	:return: a dict mapping ngrams to their ranks {most_common_ngram: 0, second: 1, ...}"""
 	counts = extract_ngram_counts(text)
 	return rank_ngram_counts(counts)