diff --git a/languedoc.py b/languedoc.py --- a/languedoc.py +++ b/languedoc.py @@ -123,13 +123,15 @@ def cross_validate(sample_sets): test_models = [partial_model] + [m for m in models if m.language != real_lang] for k in TEST_LENS: - j = random.randrange(0, len(test_text)-k) - t = test_text[j:j+k] - predicted_lang = identify(t, test_models) - print(real_lang, predicted_lang, t) - if predicted_lang == real_lang: - score += 1 - max_score += 1 + for i in range(10): + j = random.randrange(0, len(test_text)-k) + t = test_text[j:j+k] + predicted_lang = identify(t, test_models) + if predicted_lang == real_lang: + score += 1 + else: + print(real_lang, predicted_lang, t) + max_score += 1 return score / max_score, (score, max_score)