diff --git a/src/languedoc/__init__.py b/src/languedoc/__init__.py new file mode 100644 --- /dev/null +++ b/src/languedoc/__init__.py @@ -0,0 +1,1 @@ +from .predict import identify diff --git a/shared.py b/src/languedoc/predict.py rename from shared.py rename to src/languedoc/predict.py --- a/shared.py +++ b/src/languedoc/predict.py @@ -5,7 +5,7 @@ import json import gzip TOP_NGRAM_COUNT = 3000 -MODEL_PATH = os.path.join(os.path.dirname(__file__), "models.json.gz") +MODEL_PATH = os.path.join(os.path.dirname(__file__), "../../models.json.gz") def preprocess(text): diff --git a/languedoc.py b/src/languedoc/train.py rename from languedoc.py rename to src/languedoc/train.py --- a/languedoc.py +++ b/src/languedoc/train.py @@ -4,7 +4,7 @@ import itertools import json import gzip -from shared import preprocess, identify, extract_ngram_freqs, rank_ngram_freqs, Sample +from predict import preprocess, identify, extract_ngram_freqs, rank_ngram_freqs, Sample random.seed(19181028) @@ -72,9 +72,9 @@ def cross_validate(sample_sets): return score / max_score, (score, max_score) -DATA_DIR = os.path.join(os.path.dirname(__file__), "data") +DATA_DIR = os.path.join(os.path.dirname(__file__), "../../data") LANG_DIRS = sorted([x.path for x in os.scandir(DATA_DIR)]) -MODEL_PATH = os.path.join(os.path.dirname(__file__), "models.json.gz") +MODEL_PATH = os.path.join(os.path.dirname(__file__), "../../models.json.gz") if __name__ == "__main__": samples = []