Changeset - d76af898e537
[Not reviewed]
default
0 1 0
Laman - 2 years ago 2022-10-29 22:55:27

tests for exporting, loading and comparing
1 file changed with 17 insertions and 0 deletions:
0 comments (0 inline, 0 general)
tests/test_predict.py
Show inline comments
 
@@ -10,63 +10,80 @@ class TestPredict(TestCase):
 
		self.assertEqual(preprocess("A  b.c"), " a b c ")
 
		self.assertEqual(preprocess("1% "), " ")
 
		self.assertEqual(preprocess("Глава ĚŠČŘŽ"), " глава ěščřž ")
 

	
 
	def test_extract_kgram_counts(self):
 
		text = "abbbabb"
 
		self.assertEqual(extract_kgram_counts(text, 1), {"a": 2, "b": 5})
 
		self.assertEqual(extract_kgram_counts(text, 2), {"ab": 2, "bb": 3, "ba": 1})
 

	
 
	def test_extract_ngram_counts(self):
 
		text = "aab"
 
		self.assertEqual(extract_ngram_counts(text), {"a": 2, "b": 1, "aa": 1, "ab": 1, "aab": 1})
 

	
 
		text = "abbbabb"
 
		self.assertEqual(
 
			extract_ngram_counts(text),
 
			{"a": 2, "b": 5, "ab": 2, "bb": 3, "ba": 1, "abb": 2, "bbb": 1, "bba": 1, "bab": 1}
 
		)
 

	
 
	def test_rank_ngram_counts(self):
 
		freqs = {"a": 3, "aa": 1, "b": 4, "bb": 1, "c": 1}
 
		expected = {"b": 0, "a": 1, "c": 2, "aa": 3, "bb": 4}
 
		self.assertEqual(rank_ngram_counts(freqs), expected)
 

	
 
	def test_extract_ranked_ngrams(self):
 
		text = "aab"
 
		self.assertEqual(extract_ranked_ngrams(text), {"a": 0, "b": 1, "aa": 2, "ab": 3, "aab": 4})
 

	
 
		text = "abbbabb"
 
		self.assertEqual(
 
			extract_ranked_ngrams(text),
 
			{"b": 0, "bb": 1, "a": 2, "ab": 3, "abb": 4, "ba": 5, "bab": 6, "bba": 7, "bbb": 8}
 
		)
 

	
 

	
 
class TestSample(TestCase):
 
	def test_extract(self):
 
		a = Sample.extract("aaaaaa", "a")
 
		self.assertEqual(a.language, "a")
 
		self.assertEqual(a.ranked_ngrams, {'a': 0, 'aa': 1, 'aaa': 2, ' a': 3, 'a ': 4, ' aa': 5, 'aa ': 6})
 

	
 
		b = Sample.extract("aa aa aa", "b")
 
		self.assertEqual(b.ranked_ngrams, {'a': 0, ' a': 1, 'a ': 2, 'aa': 3, ' aa': 4, 'aa ': 5, 'a a': 6})
 

	
 
		c = Sample.extract("aa")
 
		self.assertEqual(c.language, "??")
 
		self.assertEqual(c.ranked_ngrams, {'a': 0, ' a': 1, 'a ': 2, 'aa': 3, ' aa': 4, 'aa ': 5})
 

	
 
	def test_load(self):
 
		exported = {"language": "en", "ngrams": list("abcdefgh")}
 
		a = Sample.load(exported)
 
		self.assertEqual(a.language, "en")
 
		self.assertEqual(a.ranked_ngrams, {k: 7-v for (k, v) in zip("hgfedcba", range(8))})
 

	
 
	def test_export(self):
 
		a = Sample("en", {k: 7-v for (k, v) in zip("hgfedcba", range(8))})
 
		self.assertEqual(a.export(), {"language": "en", "ngrams": list("abcdefgh")})
 

	
 
	def test_compare(self):
 
		a = Sample("ab", {"a": 0, "b": 1, "ab": 2})
 
		b = Sample("bc", {"b": 0, "ba": 1, "a": 2, "ab": 3})
 

	
 
		self.assertEqual(a.compare(b), 4)
 
		self.assertEqual(b.compare(a), 7)
 

	
 

	
 
class TestIdentify(TestCase):
 
	def test_identify(self):
 
		samples = [
 
			("cs", "Severní ledový oceán je nejmenší světový oceán."),
 
			("de", "Der Arktische Ozean ist der kleinste Ozean der Erde."),
 
			("en", "The Arctic Ocean is the smallest of the world's oceans."),
 
			("es", "Océano Ártico más pequeña y más septentrional del planeta"),
 
			("fr", "L'océan Arctique ce qui en fait le plus petit des océans."),
 
			("it", "Il Mar Glaciale Artico è una massa d'acqua..."),
 
			("ru", "Се́верный Ледови́тый океа́н — наименьший по площади океан Земли")
 
		]
 

	
 
		for (lang, sample) in samples:
 
			self.assertEqual(lang, identify(sample))
0 comments (0 inline, 0 general)