mothertoken

installation

$ pip install mothertoken

find the best model fit for a language

$ mothertoken rank ar

model id	chars/token	cost vs english
gpt-4o	3.132	1.56x ← most efficient
qwen2.5	2.585	1.84x
gpt-4	1.414	3.43x
mistral	1.104	3.85x

# aliases work too: arabic, spanish, thai, eng_Latn

list built-in counter aliases

$ mothertoken list

id	name	example models	counter source
gpt-4o	OpenAI o200k_base	GPT-4o, GPT-4.1	tiktoken / o200k_base
qwen2.5	Qwen 2.5 tokenizer	Qwen 2.5	Hugging Face / Qwen/Qwen2.5-7B
mistral	Mistral 7B tokenizer	Mistral 7B	Hugging Face / mistralai/Mistral-7B-v0.1
gemma2	Gemma 2 tokenizer	Gemma 2	Hugging Face / google/gemma-2-9b

# these are starter aliases, not a complete model compatibility map

# local counters run on your machine

compare selected models

$ mothertoken compare "Travesura realizada" --model gpt-4o --model Qwen/Qwen3-0.6B --model mistralai/Mistral-7B-v0.1

$ mothertoken compare --file prompt.txt --model mistralai/Mistral-7B-v0.1 --model deepseek-ai/DeepSeek-V4-Pro

model	source	tokens	chars/token
OpenAI o200k_base	o200k_base	4	4.750
Qwen/Qwen3-0.6B	Qwen/Qwen3-0.6B	5	3.800
Mistral 7B	mistralai/Mistral-7B-v0.1	6	3.167

# use aliases from mothertoken list or a Hugging Face model path

# unknown one-word ids fail fast so typos do not become network lookups

tokenize exact text

 $ mothertoken tokenize "مرحبا بالعالم" --language ar --model gpt-4o 
   ModelTokensChars/TokenEnglish Est.Vs English 
  GPT-4o52.60031.56x 
  
 $ mothertoken tokenize "Hello" --model Qwen/Qwen3-0.6B 
 $ mothertoken tokenize --file prompt.txt 
 $ mothertoken tokenize --file prompt.ar.txt --language ar --english-file prompt.en.txt 
 # local aliases and Hugging Face model paths are supported 
 # --language adds benchmark English-equivalent estimates 
 # --english-text or --english-file adds a paired translation comparison 

Model	Tokens	Chars/Token	English Est.	Vs English
GPT-4o	5	2.600	3	1.56x

researcher workflow

 $ mothertoken benchmark 
 $ mothertoken benchmark run --dry-run 
 $ mothertoken benchmark run --languages arb_Arab,spa_Latn --models gpt-4o,qwen2.5 
 $ mothertoken benchmark status 
 # benchmark shows help; benchmark run creates data 
 # without --output, run writes to the user config benchmark and makes it active 
 # benchmark aliases and Hugging Face model paths are both supported 
 $