diff --git a/src/languedoc/predict.py b/src/languedoc/predict.py
--- a/src/languedoc/predict.py
+++ b/src/languedoc/predict.py
@@ -19,9 +19,17 @@ def preprocess(text: str) -> str:
 
 
 def sample_text(text: str, segment_length: int):
+	"""Extract a reasonably and uniformly long sample from a long text.
+
+	:param text: the input text
+	:param segment_length: a text segment length. The sample is going to be 3-4 times longer.
+	:return: a text sample cut from the original text, consisting of three segments
+	"""
 	n = len(text)
+	# a text too short to sample
 	if n < 4*segment_length:
 		return text
+	# take a segment from the 1st, 2nd and 3rd quarter of the text, to get a representative sample
 	else:
 		f = lambda i: n*i//4 - segment_length//2
 		regexp = re.compile(fr"\s(.{{{segment_length}}}.*?)\s")
@@ -89,7 +97,7 @@ class Sample:
 
 	@classmethod
 	def extract(cls, text: str, language="??") -> "Sample":
-		"""Create a new Sample by extracting it from text.
+		"""Create a new Sample by extracting it from the text.
 
 		:param text: a string, from which to extract the ngrams into a Sample
 		:param language: a two letter language code if it is known (cs|de|en|...)"""
@@ -120,7 +128,7 @@ class Sample:
 
 		The method is asymmetric. You are supposed to use sample.compare(model), not model.compare(sample).
 
-		:param other: a reference model in known language"""
+		:param other: a reference model in a known language"""
 		m = len(other.ranked_ngrams)
 
 		res = sum(