diff --git a/tests/test_predict.py b/tests/test_predict.py --- a/tests/test_predict.py +++ b/tests/test_predict.py @@ -1,6 +1,7 @@ from unittest import TestCase -from languedoc.predict import preprocess, rank_ngram_counts, Sample, identify +from languedoc.predict import preprocess, extract_kgram_counts, extract_ngram_counts, rank_ngram_counts, \ + extract_ranked_ngrams, Sample, identify class TestPredict(TestCase): @@ -10,11 +11,36 @@ class TestPredict(TestCase): self.assertEqual(preprocess("1% "), " ") self.assertEqual(preprocess("Глава ĚŠČŘŽ"), " глава ěščřž ") + def test_extract_kgram_counts(self): + text = "abbbabb" + self.assertEqual(extract_kgram_counts(text, 1), {"a": 2, "b": 5}) + self.assertEqual(extract_kgram_counts(text, 2), {"ab": 2, "bb": 3, "ba": 1}) + + def test_extract_ngram_counts(self): + text = "aab" + self.assertEqual(extract_ngram_counts(text), {"a": 2, "b": 1, "aa": 1, "ab": 1, "aab": 1}) + + text = "abbbabb" + self.assertEqual( + extract_ngram_counts(text), + {"a": 2, "b": 5, "ab": 2, "bb": 3, "ba": 1, "abb": 2, "bbb": 1, "bba": 1, "bab": 1} + ) + def test_rank_ngram_counts(self): freqs = {"a": 3, "aa": 1, "b": 4, "bb": 1, "c": 1} expected = {"b": 0, "a": 1, "c": 2, "aa": 3, "bb": 4} self.assertEqual(rank_ngram_counts(freqs), expected) + def test_extract_ranked_ngrams(self): + text = "aab" + self.assertEqual(extract_ranked_ngrams(text), {"a": 0, "b": 1, "aa": 2, "ab": 3, "aab": 4}) + + text = "abbbabb" + self.assertEqual( + extract_ranked_ngrams(text), + {"b": 0, "bb": 1, "a": 2, "ab": 3, "abb": 4, "ba": 5, "bab": 6, "bba": 7, "bbb": 8} + ) + class TestSample(TestCase): def test_extract(self):