diff --git a/src/languedoc/predict.py b/src/languedoc/predict.py
--- a/src/languedoc/predict.py
+++ b/src/languedoc/predict.py
@@ -6,7 +6,10 @@ import gzip
 from typing import Union
 
 TOP_NGRAM_COUNT = 3000
+SINGLE_SEGMENT_LENGTH = 1000
+SINGLE_SAMPLE_LENGTH = 32
 MODEL_PATH = os.path.join(os.path.dirname(__file__), "models.json.gz")
+MODEL = []
 
 
 def preprocess(text: str) -> str:
@@ -15,6 +18,25 @@ def preprocess(text: str) -> str:
 	return text.lower()
 
 
+def sample_text(text: str, segment_length: int):
+	"""Extract a reasonably and uniformly long sample from a long text.
+
+	:param text: the input text
+	:param segment_length: a text segment length. The sample is going to be 3-4 times longer.
+	:return: a text sample cut from the original text, consisting of three segments
+	"""
+	n = len(text)
+	# a text too short to sample
+	if n < 4*segment_length:
+		return text
+	# take a segment from the 1st, 2nd and 3rd quarter of the text, to get a representative sample
+	else:
+		f = lambda i: n*i//4 - segment_length//2
+		regexp = re.compile(fr"\s(.{{{segment_length}}}.*?)\s")
+		matches = [regexp.search(text, f(i)) for i in range(1, 4)]
+		return " ".join(m.group(1) for m in matches if m)
+
+
 def extract_kgram_counts(text: str, k: int) -> dict[str, int]:
 	"""Extract k-gram counts from the text for a provided k.
 
@@ -75,11 +97,14 @@ class Sample:
 
 	@classmethod
 	def extract(cls, text: str, language="??") -> "Sample":
-		"""Create a new Sample by extracting it from text.
+		"""Create a new Sample by extracting it from the text.
 
 		:param text: a string, from which to extract the ngrams into a Sample
 		:param language: a two letter language code if it is known (cs|de|en|...)"""
-		return cls(language, extract_ranked_ngrams(preprocess(text)))
+		preprocessed_text = preprocess(sample_text(text, SINGLE_SEGMENT_LENGTH))
+		sample = sample_text(preprocessed_text, SINGLE_SAMPLE_LENGTH)
+
+		return cls(language, extract_ranked_ngrams(sample))
 
 	@classmethod
 	def load(cls, exported: dict) -> "Sample":
@@ -103,7 +128,7 @@ class Sample:
 
 		The method is asymmetric. You are supposed to use sample.compare(model), not model.compare(sample).
 
-		:param other: a reference model in known language"""
+		:param other: a reference model in a known language"""
 		m = len(other.ranked_ngrams)
 
 		res = sum(
@@ -126,8 +151,11 @@ def identify(text: str, models=[]) -> st
 	:param text: the text to identify
 	:param models: list of models to choose from. The default is loaded from MODEL_PATH
 	:return: best matching language (cs, de, en, ...)"""
+	global MODEL
+	if not MODEL and not models:
+		MODEL = load_models(MODEL_PATH)
 	if not models:
-		models = load_models(MODEL_PATH)
+		models = MODEL
 
 	sample = Sample.extract(text)