# HG changeset patch # User Laman # Date 2022-09-28 22:56:06 # Node ID 3980aeb455b0a5a9c3a2f921cfeb3c0103fd83cf # Parent 167aab0c3103cf9430d4dbee53d66b0cce0627fc sorted input files diff --git a/languedoc.py b/languedoc.py --- a/languedoc.py +++ b/languedoc.py @@ -119,7 +119,6 @@ def cross_validate(sample_sets): for s in sample_sets: for (test_text, partial_model) in s.generate_tests(): - partial_model.print_overview() real_lang = partial_model.language test_models = [partial_model] + [m for m in models if m.language != real_lang] @@ -142,7 +141,7 @@ def identify(text, models): DATA_DIR = os.path.join(os.path.dirname(__file__), "data") -LANG_DIRS = [x.path for x in os.scandir(DATA_DIR)] +LANG_DIRS = sorted([x.path for x in os.scandir(DATA_DIR)]) if __name__ == "__main__": samples = [] @@ -152,7 +151,7 @@ if __name__ == "__main__": lang_samples = SampleSet(lang) samples.append(lang_samples) - for file in os.scandir(d): + for file in sorted(os.scandir(d), key=lambda f: f.name): with open(file) as f: text = f.read() text = preprocess(text)