$ pip install mothertoken
$ mothertoken rank ar
tokenizer idchars/tokencost vs english
gpt-4o3.1321.56x ← most efficient
qwen2.52.5851.84x
gpt-41.4143.43x
mistral1.1043.85x
# aliases work too: arabic, spanish, thai, eng_Latn
$ mothertoken list
idnameused byaccesstokenizer
gpt-4oOpenAI o200k_baseGPT-4o, GPT-4.1localtiktoken / o200k_base
qwen2.5Qwen 2.5 tokenizerQwen 2.5localHugging Face / Qwen/Qwen2.5-7B
claude-sonnetClaude Sonnet tokenizerClaude SonnetAPIAnthropic / claude-sonnet-4-6
gemini-proGemini Pro tokenizerGemini ProAPIGoogle / gemini-1.5-pro
$ mothertoken list --local-only
# the benchmark measures tokenizers; model names are examples that help you pick the right id
$ mothertoken tokenize "مرحبا بالعالم" --language ar --model gpt-4o
ModelTokensChars/TokenEnglish Est.Vs English
GPT-4o52.60031.56x
$ mothertoken tokenize --file prompt.txt
$ mothertoken tokenize "Hello" --include-api
$ mothertoken tokenize --file prompt.ar.txt --language ar --english-file prompt.en.txt
# local tokenizers run by default
# --include-api adds provider token counters when API keys are configured
# --language adds benchmark English-equivalent estimates
# --english-text or --english-file adds a paired translation comparison
$ mothertoken-benchmark --dry-run
$ mothertoken-benchmark --languages arb_Arab,spa_Latn --models gpt-4o,qwen2.5
# benchmark regeneration is documented separately from the everyday CLI