2023-03-11 12:04:29 +03:00
|
|
|
|
<?php
|
|
|
|
|
|
|
|
|
|
if (!defined('BB_ROOT')) die(basename(__FILE__));
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Automatic correction of the language for words in the text because of the wrong keyboard layout
|
|
|
|
|
* Автоматическое исправление языка для слов в тексте из-за неправильной раскладки клавиатуры
|
|
|
|
|
*
|
|
|
|
|
* Purpose
|
|
|
|
|
* * Корректировка поисковых запросов
|
|
|
|
|
* * Корректировка существующих и новых текстов, публикуемых посетителями на веб-сайтах.
|
|
|
|
|
*
|
|
|
|
|
* Features
|
|
|
|
|
* * Режим SIMILAR_CHARS. Исправление ошибочно набранных букв в словах, которые выглядят
|
|
|
|
|
* одинаково в разных раскладках клавиатуры. Незаметные латинские буквы среди русских
|
|
|
|
|
* исправляются в русские и наоборот. Алгоритм работает достаточно надёжно и быстро.
|
|
|
|
|
* * Режим KEYBOARD_LAYOUT. Исправление ошибочно набранных слов в другой раскладке клавиатуры.
|
|
|
|
|
* Для определения языка используются N-граммы. Алгоритм может иногда ошибаться,
|
|
|
|
|
* работает в разы медленнее, чем SIMILAR_CHARS. Алгоритм постоянно совершенствуется.
|
|
|
|
|
* Для поддержания качества существует тестовый набор слов, который в поставку не входит.
|
|
|
|
|
* * Двухстороннее исправление слов для русского и английского языка.
|
|
|
|
|
* * Исправление слов на смешанном языке.
|
|
|
|
|
* * Кодировка символов — UTF-8.
|
|
|
|
|
* * Класс может работать без расширений mbstring и iconv!
|
|
|
|
|
*
|
|
|
|
|
* Examples
|
|
|
|
|
* "\xd1\x81\xd0\xbesm\xd0\xbe" => 'cosmo' (2 первых и последняя буква — ошибочные)
|
|
|
|
|
* "\x78\x70\x65н" => 'хрен' (первые 3 буквы — ошибочные)
|
|
|
|
|
* "вебvfcnth" => 'вебмастер'
|
|
|
|
|
* "webьфыеук" => 'webmaster'
|
|
|
|
|
* "цццюмуыеш.ru" => 'www.vesti.ru'
|
|
|
|
|
* "\x54.\x43.\x48\x61вка" => 'Т.С.Навка'
|
|
|
|
|
*
|
|
|
|
|
* Hints
|
|
|
|
|
* Типичный пример алгоритма работы для поля ввода с автодополнением:
|
|
|
|
|
* 1. Сделать выборку по исходному запросу;
|
|
|
|
|
* 2. Если есть результат, возвратить его и исходный запрос;
|
|
|
|
|
* 3. Иначе скорректировать исходный запрос через Text_LangCorrect;
|
|
|
|
|
* 4. Если исходный и скорректированный запрос совпадает, возвратить пустой результат и исходный запрос;
|
|
|
|
|
* 5. Иначе сделать выборку по скорректированному запросу;
|
|
|
|
|
* 6. Возвратить результат. Если результат не пустой, возвратить скорректированный запрос, иначе исходный.
|
|
|
|
|
*
|
|
|
|
|
* License
|
|
|
|
|
* Только для некоммерческого использования!
|
|
|
|
|
*
|
|
|
|
|
* @link http://code.google.com/p/php-lang-correct/
|
|
|
|
|
* @license http://creativecommons.org/licenses/by-nc-sa/3.0/
|
|
|
|
|
* @author Nasibullin Rinat
|
|
|
|
|
* @version 1.4.3
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
class Text_LangCorrect
|
|
|
|
|
{
|
|
|
|
|
/**
|
|
|
|
|
* Флаг для исправления ошибочно набранных букв в словах,
|
|
|
|
|
* которые выглядят одинаково в разных раскладках клавиатуры.
|
|
|
|
|
* Алгоритм работает достаточно надёжно и быстро.
|
|
|
|
|
*/
|
|
|
|
|
const SIMILAR_CHARS = 1;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Флаг для исправления ошибочно набранных слов в другой раскладке клавиатуры.
|
|
|
|
|
* Алгоритм может иногда ошибаться, работает в разы медленнее, чем SIMILAR_CHARS.
|
|
|
|
|
*/
|
|
|
|
|
const KEYBOARD_LAYOUT = 2;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Флаг для добавления исправлений, если влючён флаг KEYBOARD_LAYOUT
|
|
|
|
|
* Синтаксис и пример: "(,.cn=>бюст)"
|
|
|
|
|
* ^ ^^ ^
|
|
|
|
|
*/
|
|
|
|
|
const ADD_FIX = 4;
|
|
|
|
|
|
|
|
|
|
#английский (all)
|
|
|
|
|
private $en = '[a-zA-Z]';
|
|
|
|
|
|
|
|
|
|
#английский (uppercase)
|
|
|
|
|
private $en_uc = '[A-Z]';
|
|
|
|
|
|
|
|
|
|
#английский + символы, которые м.б. набраны по ошибке в английской раскладке клавиатуры вместо русских букв (all)
|
|
|
|
|
private $en_sc = '[a-zA-Z\'`~<>,.:;{}\[\]"]';
|
|
|
|
|
|
|
|
|
|
#символы, которые м.б. набраны по ошибке в английской раскладке клавиатуры вместо русских букв
|
|
|
|
|
private $sc = '[\'`~<>,.:;{}\[\]"]';
|
|
|
|
|
private $no_sc = '[^\'`~<>,.:;{}\[\]"]';
|
|
|
|
|
|
|
|
|
|
#русский + татарский (all)
|
|
|
|
|
private $tt = '[\xd0-\xd3][\x80-\xbf]
|
|
|
|
|
(?<=\xd0[\x90-\xbf\x81]|\xd1[\x80-\x8f\x91]|\xd2[\x96\x97\xa2\xa3\xae\xaf\xba\xbb]|\xd3[\x98\x99\xa8\xa9])';
|
|
|
|
|
|
|
|
|
|
#русский + татарский (uppercase)
|
|
|
|
|
private $tt_uc = '[\xd0\xd2\xd3][\x81-\xba]
|
|
|
|
|
(?<=\xd0[\x90-\xaf\x81]|\xd2[\x96\xa2\xae\xba]|\xd3[\x98\xa8])';
|
|
|
|
|
|
|
|
|
|
#русский + татарский (для фильтрованных текстов) (all)
|
|
|
|
|
private $tt_f = '[\xd0-\xd3][\x80-\xbf]
|
|
|
|
|
#комментируем для увеличения скорости, т.к. остальные символы отфильтрованы
|
|
|
|
|
#(?<=\xd0[\x90-\xbf\x81]|\xd1[\x80-\x8f\x91]|\xd2[\x96\x97\xa2\xa3\xae\xaf\xba\xbb]|\xd3[\x98\x99\xa8\xa9])
|
|
|
|
|
';
|
|
|
|
|
|
|
|
|
|
#гласная (vowel) (lowercase)
|
|
|
|
|
private $vowel_lc = array(
|
|
|
|
|
'tt' => '\xd0[\xb0\xb5\xb8\xbe]|\xd1[\x83\x8b\x8d\x8e\x8f\x91] #аеиоуыэюяё (гласные, 10 шт.)
|
|
|
|
|
#| \xd0[\x90\x95\x98\x9e\xa3\xab\xad\xae\xaf\x81] #АЕИОУЫЭЮЯЁ (гласные, 10 шт.)
|
|
|
|
|
',
|
|
|
|
|
'en' => '[aeiouy]', #латинских 6 шт.
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
#согласная (consonant) + графические знаки для русского языка (ъ, ь) (lowercase)
|
|
|
|
|
private $consonant_lc = array(
|
|
|
|
|
'tt' => '\xd0[\xb1-\xb4\xb6\xb7\xb9\xba-\xbd\xbf]|\xd1[\x80\x81\x82\x84-\x89\x8a\x8c] #бвгджзйклмнпрстфхцчшщ ъь (согласные, 21+2 шт.)
|
|
|
|
|
#| \xd0[\x91-\x94\x96\x97\x99\x9a-\x9d\x9f-\xa2\xa4-\xa9\xaa\xac] #БВГДЖЗЙКЛМНПРСТФХЦЧШЩ ЪЬ (согласные, 21+2 шт.)
|
|
|
|
|
',
|
|
|
|
|
'en' => '[bcdfghjklmnpqrstvwxz]', #латинских 20 шт.
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
private $words_exceptions = array(
|
|
|
|
|
'tt' => array(
|
|
|
|
|
'трлн' => null,
|
|
|
|
|
'ющенко' => null,
|
|
|
|
|
'мебельград' => null,
|
|
|
|
|
'дэнис' => null,
|
|
|
|
|
),
|
|
|
|
|
'en' => array(
|
|
|
|
|
'heuer' => null,
|
|
|
|
|
),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
#русские буквы, похожие на англ. (uppercase)
|
|
|
|
|
private $ru_similar_uc = "\xd0[\x90\x92\x95\x9a\x9c\x9d\x9e\xa0-\xa3\xa5]";
|
|
|
|
|
|
|
|
|
|
#русские буквы, похожие на англ. (all)
|
|
|
|
|
private $ru_similar = "\xd0[\x90\x92\x95\x9a\x9c\x9d\x9e\xa0-\xa3\xa5\xb0\xb5\xbe]|\xd1[\x80\x81\x83\x85]";
|
|
|
|
|
|
|
|
|
|
#англ. буквы, похожие на русские (uppercase)
|
|
|
|
|
private $en_similar_uc = '[ABEKMHOPCTYX]';
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
#$tt_fake = '\xd0[\xb0\xb5\xbe\x90\x92\x95\x9a\x9c\x9d\x9e\xa0\xa1\xa2\xa3\xa5]|\xd1[\x80\x81\x83\x85]';
|
|
|
|
|
$tt_fake = '[\xd0\xd1][\x80-\xbe]
|
|
|
|
|
(?<=\xd0[\xb0\xb5\xbe\x90\x92\x95\x9a\x9c\x9d\x9e\xa0\xa1\xa2\xa3\xa5]|\xd1[\x80\x81\x83\x85])';
|
|
|
|
|
$en_fake = '[aeopcyxABEKMHOPCTYX]';
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#уникальные русские буквы
|
|
|
|
|
/*
|
|
|
|
|
CASE_UPPER, case_lower
|
|
|
|
|
"\xd0\x81", "\xd1\x91", #Ё ё
|
|
|
|
|
"\xd0\x91", "\xd0\xb1", #Б б
|
|
|
|
|
"\xd0\x92", "\xd0\xb2", #В в
|
|
|
|
|
"\xd0\x93", "\xd0\xb3", #Г г
|
|
|
|
|
"\xd0\x94", "\xd0\xb4", #Д д
|
|
|
|
|
"\xd0\x96", "\xd0\xb6", #Ж ж
|
|
|
|
|
"\xd0\x97", "\xd0\xb7", #З з
|
|
|
|
|
"\xd0\x98", "\xd0\xb8", #И и
|
|
|
|
|
"\xd0\x99", "\xd0\xb9", #Й й
|
|
|
|
|
"\xd0\xba", #К к
|
|
|
|
|
"\xd0\x9b", "\xd0\xbb", #Л л
|
|
|
|
|
"\xd0\xbd", #Н н
|
|
|
|
|
"\xd0\x9f", "\xd0\xbf", #П п
|
|
|
|
|
"\xd1\x82", #Т т
|
|
|
|
|
"\xd0\xa4", "\xd1\x84", #Ф ф
|
|
|
|
|
"\xd0\xa6", "\xd1\x86", #Ц ц
|
|
|
|
|
"\xd0\xa7", "\xd1\x87", #Ч ч
|
|
|
|
|
"\xd0\xa8", "\xd1\x88", #Ш ш
|
|
|
|
|
"\xd0\xa9", "\xd1\x89", #Щ щ
|
|
|
|
|
"\xd0\xaa", "\xd1\x8a", #Ъ ъ
|
|
|
|
|
"\xd0\xab", "\xd1\x8b", #Ы ы
|
|
|
|
|
"\xd0\xac", "\xd1\x8c", #Ь ь
|
|
|
|
|
"\xd0\xad", "\xd1\x8d", #Э э
|
|
|
|
|
"\xd0\xae", "\xd1\x8e", #Ю ю
|
|
|
|
|
"\xd0\xaf", "\xd1\x8f", #Я я
|
|
|
|
|
*/
|
|
|
|
|
#$tt_uniq = "\xd0[\xb1-\xb4\xb6-\xbb\xbd\xbf\x81\x91-\x94\x96-\x99\x9b\x9f\xa4\xa6-\xaf]|\xd1[\x82\x84\x86-\x8f\x91]";
|
|
|
|
|
private $tt_uniq = "[\xd0\xd1][\x82-\xbf]
|
|
|
|
|
(?<=\xd0[\xb1-\xb4\xb6-\xbb\xbd\xbf\x81\x91-\x94\x96-\x99\x9b\x9f\xa4\xa6-\xaf]|\xd1[\x82\x84\x86-\x8f\x91])";
|
|
|
|
|
|
|
|
|
|
#уникальные латинские буквы
|
|
|
|
|
/*
|
|
|
|
|
CASE_UPPER, case_lower
|
|
|
|
|
"\x42", "\x62", #B b
|
|
|
|
|
"\x44", "\x64", #D d
|
|
|
|
|
"\x46", "\x66", #F f
|
|
|
|
|
"\x68", #H h
|
|
|
|
|
"\x49", "\x69", #I i
|
|
|
|
|
"\x4a", "\x6a", #J j
|
|
|
|
|
"\x6b", #K k
|
|
|
|
|
"\x4c", "\x6c", #L l
|
|
|
|
|
"\x6d", #M m
|
|
|
|
|
"\x4e", "\x6e", #N n
|
|
|
|
|
"\x51", "\x71", #Q q
|
|
|
|
|
"\x52", "\x72", #R r
|
|
|
|
|
"\x53", "\x73", #S s
|
|
|
|
|
"\x74", #T t
|
|
|
|
|
"\x55", "\x75", #U u
|
|
|
|
|
"\x56", "\x76", #V v
|
|
|
|
|
"\x57", "\x77", #W w
|
|
|
|
|
"\x5a", "\x7a", #Z z
|
|
|
|
|
*/
|
|
|
|
|
private $en_uniq = "[\x42\x44\x46\x49\x4a\x4c\x4e\x51\x52\x53\x55\x57\x56\x5a\x62\x64\x66\x68\x69\x6a-\x6e\x71-\x77\x7a]";
|
|
|
|
|
|
|
|
|
|
private $table_flip; #array
|
|
|
|
|
private $words; #corrected words
|
|
|
|
|
private $en_correct; #string
|
|
|
|
|
private $tt_correct; #string
|
|
|
|
|
private $mode; #bool
|
|
|
|
|
|
|
|
|
|
private $is_flip = false;
|
|
|
|
|
private $method = 0;
|
|
|
|
|
|
|
|
|
|
private $table = array(
|
|
|
|
|
#метод 0: таблица исправления ошибочно набранных букв, которые выглядят одинаково (русский <--> английский)
|
|
|
|
|
0 => array(
|
|
|
|
|
#lowercase #UPPERCASE
|
|
|
|
|
"\xd0\xb0" => 'a', "\xd0\x90" => 'A',
|
|
|
|
|
"\xd0\x92" => 'B',
|
|
|
|
|
"\xd0\xb5" => 'e', "\xd0\x95" => 'E',
|
|
|
|
|
"\xd0\x9a" => 'K',
|
|
|
|
|
"\xd0\x9c" => 'M',
|
|
|
|
|
"\xd0\x9d" => 'H',
|
|
|
|
|
"\xd0\xbe" => 'o', "\xd0\x9e" => 'O',
|
|
|
|
|
"\xd1\x80" => 'p', "\xd0\xa0" => 'P',
|
|
|
|
|
"\xd1\x81" => 'c', "\xd0\xa1" => 'C',
|
|
|
|
|
"\xd0\xa2" => 'T',
|
|
|
|
|
"\xd1\x83" => 'y', "\xd0\xa3" => 'Y',
|
|
|
|
|
"\xd1\x85" => 'x', "\xd0\xa5" => 'X',
|
|
|
|
|
),
|
|
|
|
|
#метод 1: таблица исправления ошибочно набранных букв в другой раскладке клавиатуры (русский <--> английский)
|
|
|
|
|
1 => array(
|
|
|
|
|
#CASE_UPPER #case_lower
|
|
|
|
|
"\xd0\x81" => '~', "\xd1\x91" => '`', #Ё ё
|
|
|
|
|
"\xd0\x90" => 'F', "\xd0\xb0" => 'f', #А а
|
|
|
|
|
"\xd0\x91" => '<', "\xd0\xb1" => ',', #Б б
|
|
|
|
|
"\xd0\x92" => 'D', "\xd0\xb2" => 'd', #В в
|
|
|
|
|
"\xd0\x93" => 'U', "\xd0\xb3" => 'u', #Г г
|
|
|
|
|
"\xd0\x94" => 'L', "\xd0\xb4" => 'l', #Д д
|
|
|
|
|
"\xd0\x95" => 'T', "\xd0\xb5" => 't', #Е е
|
|
|
|
|
"\xd0\x96" => ':', "\xd0\xb6" => ';', #Ж ж
|
|
|
|
|
"\xd0\x97" => 'P', "\xd0\xb7" => 'p', #З з
|
|
|
|
|
"\xd0\x98" => 'B', "\xd0\xb8" => 'b', #И и
|
|
|
|
|
"\xd0\x99" => 'Q', "\xd0\xb9" => 'q', #Й й
|
|
|
|
|
"\xd0\x9a" => 'R', "\xd0\xba" => 'r', #К к
|
|
|
|
|
"\xd0\x9b" => 'K', "\xd0\xbb" => 'k', #Л л
|
|
|
|
|
"\xd0\x9c" => 'V', "\xd0\xbc" => 'v', #М м
|
|
|
|
|
"\xd0\x9d" => 'Y', "\xd0\xbd" => 'y', #Н н
|
|
|
|
|
"\xd0\x9e" => 'J', "\xd0\xbe" => 'j', #О о
|
|
|
|
|
"\xd0\x9f" => 'G', "\xd0\xbf" => 'g', #П п
|
|
|
|
|
#CASE_UPPER #case_lower
|
|
|
|
|
"\xd0\xa0" => 'H', "\xd1\x80" => 'h', #Р р
|
|
|
|
|
"\xd0\xa1" => 'C', "\xd1\x81" => 'c', #С с
|
|
|
|
|
"\xd0\xa2" => 'N', "\xd1\x82" => 'n', #Т т
|
|
|
|
|
"\xd0\xa3" => 'E', "\xd1\x83" => 'e', #У у
|
|
|
|
|
"\xd0\xa4" => 'A', "\xd1\x84" => 'a', #Ф ф
|
|
|
|
|
"\xd0\xa5" => '{', "\xd1\x85" => '[', #Х х
|
|
|
|
|
"\xd0\xa6" => 'W', "\xd1\x86" => 'w', #Ц ц
|
|
|
|
|
"\xd0\xa7" => 'X', "\xd1\x87" => 'x', #Ч ч
|
|
|
|
|
"\xd0\xa8" => 'I', "\xd1\x88" => 'i', #Ш ш
|
|
|
|
|
"\xd0\xa9" => 'O', "\xd1\x89" => 'o', #Щ щ
|
|
|
|
|
"\xd0\xaa" => '}', "\xd1\x8a" => ']', #Ъ ъ
|
|
|
|
|
"\xd0\xab" => 'S', "\xd1\x8b" => 's', #Ы ы
|
|
|
|
|
"\xd0\xac" => 'M', "\xd1\x8c" => 'm', #Ь ь
|
|
|
|
|
"\xd0\xad" => '"', "\xd1\x8d" => "'", #Э э
|
|
|
|
|
"\xd0\xae" => '>', "\xd1\x8e" => '.', #Ю ю
|
|
|
|
|
"\xd0\xaf" => 'Z', "\xd1\x8f" => 'z', #Я я
|
|
|
|
|
),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
#несуществующие N-граммы для гласных букв
|
|
|
|
|
private $vowels3_lc = array(
|
|
|
|
|
'en' => array(
|
|
|
|
|
'aea' => 0,
|
|
|
|
|
'aei' => 1,
|
|
|
|
|
'aeo' => 2,
|
|
|
|
|
'aeu' => 3,
|
|
|
|
|
'aia' => 4,
|
|
|
|
|
'aie' => 5,
|
|
|
|
|
'aii' => 6,
|
|
|
|
|
'aoi' => 7,
|
|
|
|
|
'aou' => 8,
|
|
|
|
|
'aue' => 9,
|
|
|
|
|
'aya' => 10,
|
|
|
|
|
'aye' => 11,
|
|
|
|
|
'ayi' => 12,
|
|
|
|
|
'ayo' => 13,
|
|
|
|
|
'ayu' => 14,
|
|
|
|
|
'eae' => 15,
|
|
|
|
|
'eau' => 16,
|
|
|
|
|
'eea' => 17,
|
|
|
|
|
'eei' => 18,
|
|
|
|
|
'eeu' => 19,
|
|
|
|
|
'eia' => 20,
|
|
|
|
|
'eiu' => 21,
|
|
|
|
|
'eoi' => 22,
|
|
|
|
|
'eou' => 23,
|
|
|
|
|
'eya' => 24,
|
|
|
|
|
'eye' => 25,
|
|
|
|
|
'eyi' => 26,
|
|
|
|
|
'eyo' => 27,
|
|
|
|
|
'iae' => 28,
|
|
|
|
|
'iai' => 29,
|
|
|
|
|
'iao' => 30,
|
|
|
|
|
'iau' => 31,
|
|
|
|
|
'iei' => 32,
|
|
|
|
|
'ieu' => 33,
|
|
|
|
|
'ioa' => 34,
|
|
|
|
|
'ioe' => 35,
|
|
|
|
|
'iou' => 36,
|
|
|
|
|
'iya' => 37,
|
|
|
|
|
'oae' => 38,
|
|
|
|
|
'oea' => 39,
|
|
|
|
|
'oei' => 40,
|
|
|
|
|
'oeo' => 41,
|
|
|
|
|
'oeu' => 42,
|
|
|
|
|
'oey' => 43,
|
|
|
|
|
'oia' => 44,
|
|
|
|
|
'oie' => 45,
|
|
|
|
|
'ooe' => 46,
|
|
|
|
|
'ooi' => 47,
|
|
|
|
|
'oou' => 48,
|
|
|
|
|
'oua' => 49,
|
|
|
|
|
'oue' => 50,
|
|
|
|
|
'oui' => 51,
|
|
|
|
|
'oya' => 52,
|
|
|
|
|
'oye' => 53,
|
|
|
|
|
'oyi' => 54,
|
|
|
|
|
'oyo' => 55,
|
|
|
|
|
'uae' => 56,
|
|
|
|
|
'uai' => 57,
|
|
|
|
|
'uay' => 58,
|
|
|
|
|
'uea' => 59,
|
|
|
|
|
'uee' => 60,
|
|
|
|
|
'uei' => 61,
|
|
|
|
|
'ueo' => 62,
|
|
|
|
|
'ueu' => 63,
|
|
|
|
|
'uey' => 64,
|
|
|
|
|
'uia' => 65,
|
|
|
|
|
'uie' => 66,
|
|
|
|
|
'uio' => 67,
|
|
|
|
|
'uiu' => 68,
|
|
|
|
|
'uoa' => 69,
|
|
|
|
|
'uoi' => 70,
|
|
|
|
|
'uou' => 71,
|
|
|
|
|
'uoy' => 72,
|
|
|
|
|
'uya' => 73,
|
|
|
|
|
'uye' => 74,
|
|
|
|
|
'uyi' => 75,
|
|
|
|
|
'yae' => 76,
|
|
|
|
|
'yao' => 77,
|
|
|
|
|
'yau' => 78,
|
|
|
|
|
'yea' => 79,
|
|
|
|
|
'yei' => 80,
|
|
|
|
|
'yeo' => 81,
|
|
|
|
|
'yey' => 82,
|
|
|
|
|
'yie' => 83,
|
|
|
|
|
'yoi' => 84,
|
|
|
|
|
'you' => 85,
|
|
|
|
|
'yoy' => 86,
|
|
|
|
|
'yua' => 87,
|
|
|
|
|
),
|
|
|
|
|
'tt' => array(
|
|
|
|
|
'аау' => 0,
|
|
|
|
|
'аео' => 1,
|
|
|
|
|
'аеу' => 2,
|
|
|
|
|
'аиа' => 3,
|
|
|
|
|
'аио' => 4,
|
|
|
|
|
'аиу' => 5,
|
|
|
|
|
'аои' => 6,
|
|
|
|
|
'ауэ' => 7,
|
|
|
|
|
'аяя' => 8,
|
|
|
|
|
'еаэ' => 9,
|
|
|
|
|
'еее' => 10,
|
|
|
|
|
'еео' => 11,
|
|
|
|
|
'еоа' => 12,
|
|
|
|
|
'еои' => 13,
|
|
|
|
|
'еоо' => 14,
|
|
|
|
|
'еую' => 15,
|
|
|
|
|
'еуя' => 16,
|
|
|
|
|
'еуё' => 17,
|
|
|
|
|
'иау' => 18,
|
|
|
|
|
'иео' => 19,
|
|
|
|
|
'иие' => 20,
|
|
|
|
|
'иоа' => 21,
|
|
|
|
|
'иои' => 22,
|
|
|
|
|
'иоу' => 23,
|
|
|
|
|
'иоэ' => 24,
|
|
|
|
|
'ияе' => 25,
|
|
|
|
|
'ияи' => 26,
|
|
|
|
|
'ияю' => 27,
|
|
|
|
|
'оаэ' => 28,
|
|
|
|
|
'оео' => 29,
|
|
|
|
|
'оею' => 30,
|
|
|
|
|
'оие' => 31,
|
|
|
|
|
'оуе' => 32,
|
|
|
|
|
'оуя' => 33,
|
|
|
|
|
'оюе' => 34,
|
|
|
|
|
'оюю' => 35,
|
|
|
|
|
'ояе' => 36,
|
|
|
|
|
'уео' => 37,
|
|
|
|
|
'уюю' => 38,
|
|
|
|
|
),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
#несуществующие N-граммы для согласных букв
|
|
|
|
|
private $consonants4_lc = array(
|
|
|
|
|
'en' => array(
|
|
|
|
|
'bldg' => 0,
|
|
|
|
|
'blvd' => 1,
|
|
|
|
|
'bscr' => 2,
|
|
|
|
|
'bstr' => 3,
|
|
|
|
|
'cbcm' => 4,
|
|
|
|
|
'cbft' => 5,
|
|
|
|
|
'chfr' => 6,
|
|
|
|
|
'chmn' => 7,
|
|
|
|
|
'chsc' => 8,
|
|
|
|
|
'chsh' => 9,
|
|
|
|
|
'chst' => 10,
|
|
|
|
|
'chth' => 11,
|
|
|
|
|
'chts' => 12,
|
|
|
|
|
'ckbr' => 13,
|
|
|
|
|
'ckch' => 14,
|
|
|
|
|
'ckcl' => 15,
|
|
|
|
|
'ckdr' => 16,
|
|
|
|
|
'ckgr' => 17,
|
|
|
|
|
'cksc' => 18,
|
|
|
|
|
'cksf' => 19,
|
|
|
|
|
'cksh' => 20,
|
|
|
|
|
'cksk' => 21,
|
|
|
|
|
'cksl' => 22,
|
|
|
|
|
'cksm' => 23,
|
|
|
|
|
'cksn' => 24,
|
|
|
|
|
'cksp' => 25,
|
|
|
|
|
'ckst' => 26,
|
|
|
|
|
'cksw' => 27,
|
|
|
|
|
'ckth' => 28,
|
|
|
|
|
'cktr' => 29,
|
|
|
|
|
'ckwh' => 30,
|
|
|
|
|
'cmps' => 31,
|
|
|
|
|
'dspr' => 32,
|
|
|
|
|
'dstr' => 33,
|
|
|
|
|
'dthw' => 34,
|
|
|
|
|
'ffsc' => 35,
|
|
|
|
|
'ffsh' => 36,
|
|
|
|
|
'ffsp' => 37,
|
|
|
|
|
'fthl' => 38,
|
|
|
|
|
'ftsm' => 39,
|
|
|
|
|
'ftsp' => 40,
|
|
|
|
|
'gdns' => 41,
|
|
|
|
|
'ghbr' => 42,
|
|
|
|
|
'ghfl' => 43,
|
|
|
|
|
'ghsh' => 44,
|
|
|
|
|
'ghtb' => 45,
|
|
|
|
|
'ghtc' => 46,
|
|
|
|
|
'ghtf' => 47,
|
|
|
|
|
'ghth' => 48,
|
|
|
|
|
'ghtj' => 49,
|
|
|
|
|
'ghtl' => 50,
|
|
|
|
|
'ghtm' => 51,
|
|
|
|
|
'ghtn' => 52,
|
|
|
|
|
'ghtr' => 53,
|
|
|
|
|
'ghts' => 54,
|
|
|
|
|
'ghtw' => 55,
|
|
|
|
|
'hdbk' => 56,
|
|
|
|
|
'hnst' => 57,
|
|
|
|
|
'jctn' => 58,
|
|
|
|
|
'khsh' => 59,
|
|
|
|
|
'khst' => 60,
|
|
|
|
|
'lchr' => 61,
|
|
|
|
|
'ldpr' => 62,
|
|
|
|
|
'ldsh' => 63,
|
|
|
|
|
'ldsm' => 64,
|
|
|
|
|
'ldsp' => 65,
|
|
|
|
|
'ldst' => 66,
|
|
|
|
|
'lfsk' => 67,
|
|
|
|
|
'lfth' => 68,
|
|
|
|
|
'lgth' => 69,
|
|
|
|
|
'llfl' => 70,
|
|
|
|
|
'llfr' => 71,
|
|
|
|
|
'llph' => 72,
|
|
|
|
|
'llpl' => 73,
|
|
|
|
|
'llsh' => 74,
|
|
|
|
|
'llsp' => 75,
|
|
|
|
|
'llst' => 76,
|
|
|
|
|
'lltr' => 77,
|
|
|
|
|
'llwr' => 78,
|
|
|
|
|
'lmcr' => 79,
|
|
|
|
|
'lmsm' => 80,
|
|
|
|
|
'lnrk' => 81,
|
|
|
|
|
'lnsh' => 82,
|
|
|
|
|
'lptr' => 83,
|
|
|
|
|
'lsgr' => 84,
|
|
|
|
|
'lshm' => 85,
|
|
|
|
|
'lshw' => 86,
|
|
|
|
|
'lstr' => 87,
|
|
|
|
|
'lthf' => 88,
|
|
|
|
|
'ltsf' => 89,
|
|
|
|
|
'ltsh' => 90,
|
|
|
|
|
'ltst' => 91,
|
|
|
|
|
'mbsc' => 92,
|
|
|
|
|
'mbsh' => 93,
|
|
|
|
|
'mbsk' => 94,
|
|
|
|
|
'mbst' => 95,
|
|
|
|
|
'mddx' => 96,
|
|
|
|
|
'mdnt' => 97,
|
|
|
|
|
'mpbl' => 98,
|
|
|
|
|
'mpgr' => 99,
|
|
|
|
|
'mphl' => 100,
|
|
|
|
|
'mphr' => 101,
|
|
|
|
|
'mpsh' => 102,
|
|
|
|
|
'mpst' => 103,
|
|
|
|
|
'mptl' => 104,
|
|
|
|
|
'mptn' => 105,
|
|
|
|
|
'mptr' => 106,
|
|
|
|
|
'mpts' => 107,
|
|
|
|
|
'mscr' => 108,
|
|
|
|
|
'mstr' => 109,
|
|
|
|
|
'nchb' => 110,
|
|
|
|
|
'nchl' => 111,
|
|
|
|
|
'nchm' => 112,
|
|
|
|
|
'nchn' => 113,
|
|
|
|
|
'nchp' => 114,
|
|
|
|
|
'nchr' => 115,
|
|
|
|
|
'nchw' => 116,
|
|
|
|
|
'nctl' => 117,
|
|
|
|
|
'nctn' => 118,
|
|
|
|
|
'ndbk' => 119,
|
|
|
|
|
'ndbr' => 120,
|
|
|
|
|
'ndch' => 121,
|
|
|
|
|
'ndfl' => 122,
|
|
|
|
|
'ndgl' => 123,
|
|
|
|
|
'ndgr' => 124,
|
|
|
|
|
'ndsc' => 125,
|
|
|
|
|
'ndsh' => 126,
|
|
|
|
|
'ndsl' => 127,
|
|
|
|
|
'ndsm' => 128,
|
|
|
|
|
'ndsp' => 129,
|
|
|
|
|
'ndst' => 130,
|
|
|
|
|
'ndsw' => 131,
|
|
|
|
|
'ndth' => 132,
|
|
|
|
|
'ndwr' => 133,
|
|
|
|
|
'ngcr' => 134,
|
|
|
|
|
'ngsg' => 135,
|
|
|
|
|
'ngsh' => 136,
|
|
|
|
|
'ngsm' => 137,
|
|
|
|
|
'ngsp' => 138,
|
|
|
|
|
'ngst' => 139,
|
|
|
|
|
'ngth' => 140,
|
|
|
|
|
'ngtz' => 141,
|
|
|
|
|
'nksg' => 142,
|
|
|
|
|
'nksh' => 143,
|
|
|
|
|
'nksm' => 144,
|
|
|
|
|
'nkst' => 145,
|
|
|
|
|
'nsch' => 146,
|
|
|
|
|
'nscr' => 147,
|
|
|
|
|
'nsgr' => 148,
|
|
|
|
|
'nshr' => 149,
|
|
|
|
|
'nskr' => 150,
|
|
|
|
|
'nspl' => 151,
|
|
|
|
|
'nspr' => 152,
|
|
|
|
|
'nssh' => 153,
|
|
|
|
|
'nstr' => 154,
|
|
|
|
|
'ntbr' => 155,
|
|
|
|
|
'nthl' => 156,
|
|
|
|
|
'nthr' => 157,
|
|
|
|
|
'nths' => 158,
|
|
|
|
|
'ntsh' => 159,
|
|
|
|
|
'ntsm' => 160,
|
|
|
|
|
'phth' => 161,
|
|
|
|
|
'pstr' => 162,
|
|
|
|
|
'pthr' => 163,
|
|
|
|
|
'pths' => 164,
|
|
|
|
|
'ptwr' => 165,
|
|
|
|
|
'rbst' => 166,
|
|
|
|
|
'rchb' => 167,
|
|
|
|
|
'rchd' => 168,
|
|
|
|
|
'rchl' => 169,
|
|
|
|
|
'rchm' => 170,
|
|
|
|
|
'rchn' => 171,
|
|
|
|
|
'rchp' => 172,
|
|
|
|
|
'rchw' => 173,
|
|
|
|
|
'rdsh' => 174,
|
|
|
|
|
'rdsm' => 175,
|
|
|
|
|
'rdst' => 176,
|
|
|
|
|
'rghs' => 177,
|
|
|
|
|
'rkpl' => 178,
|
|
|
|
|
'rksc' => 179,
|
|
|
|
|
'rksh' => 180,
|
|
|
|
|
'rksk' => 181,
|
|
|
|
|
'rksm' => 182,
|
|
|
|
|
'rksp' => 183,
|
|
|
|
|
'rkst' => 184,
|
|
|
|
|
'rldl' => 185,
|
|
|
|
|
'rldw' => 186,
|
|
|
|
|
'rlfr' => 187,
|
|
|
|
|
'rmch' => 188,
|
|
|
|
|
'rmst' => 189,
|
|
|
|
|
'rmth' => 190,
|
|
|
|
|
'rnbl' => 191,
|
|
|
|
|
'rndl' => 192,
|
|
|
|
|
'rnsk' => 193,
|
|
|
|
|
'rnsp' => 194,
|
|
|
|
|
'rnst' => 195,
|
|
|
|
|
'rsch' => 196,
|
|
|
|
|
'rscr' => 197,
|
|
|
|
|
'rshl' => 198,
|
|
|
|
|
'rshn' => 199,
|
|
|
|
|
'rspr' => 200,
|
|
|
|
|
'rstl' => 201,
|
|
|
|
|
'rstr' => 202,
|
|
|
|
|
'rsts' => 203,
|
|
|
|
|
'rstw' => 204,
|
|
|
|
|
'rtbr' => 205,
|
|
|
|
|
'rtch' => 206,
|
|
|
|
|
'rtcr' => 207,
|
|
|
|
|
'rthb' => 208,
|
|
|
|
|
'rthc' => 209,
|
|
|
|
|
'rthd' => 210,
|
|
|
|
|
'rthf' => 211,
|
|
|
|
|
'rthl' => 212,
|
|
|
|
|
'rthm' => 213,
|
|
|
|
|
'rthq' => 214,
|
|
|
|
|
'rthr' => 215,
|
|
|
|
|
'rths' => 216,
|
|
|
|
|
'rthw' => 217,
|
|
|
|
|
'rtsh' => 218,
|
|
|
|
|
'rtsm' => 219,
|
|
|
|
|
'rtsp' => 220,
|
|
|
|
|
'rtsw' => 221,
|
|
|
|
|
'schl' => 222,
|
|
|
|
|
'schm' => 223,
|
|
|
|
|
'schn' => 224,
|
|
|
|
|
'schw' => 225,
|
|
|
|
|
'scrp' => 226,
|
|
|
|
|
'sgmt' => 227,
|
|
|
|
|
'shcl' => 228,
|
|
|
|
|
'shkh' => 229,
|
|
|
|
|
'shpr' => 230,
|
|
|
|
|
'shpt' => 231,
|
|
|
|
|
'shst' => 232,
|
|
|
|
|
'shtr' => 233,
|
|
|
|
|
'shwh' => 234,
|
|
|
|
|
'smth' => 235,
|
|
|
|
|
'ssrs' => 236,
|
|
|
|
|
'ssst' => 237,
|
|
|
|
|
'sstd' => 238,
|
|
|
|
|
'sstr' => 239,
|
|
|
|
|
'stcr' => 240,
|
|
|
|
|
'sthm' => 241,
|
|
|
|
|
'stpl' => 242,
|
|
|
|
|
'stpr' => 243,
|
|
|
|
|
'stsc' => 244,
|
|
|
|
|
'stwr' => 245,
|
|
|
|
|
'tblt' => 246,
|
|
|
|
|
'tchb' => 247,
|
|
|
|
|
'tchc' => 248,
|
|
|
|
|
'tchd' => 249,
|
|
|
|
|
'tchf' => 250,
|
|
|
|
|
'tchl' => 251,
|
|
|
|
|
'tchm' => 252,
|
|
|
|
|
'tchp' => 253,
|
|
|
|
|
'tchw' => 254,
|
|
|
|
|
'thdr' => 255,
|
|
|
|
|
'thsh' => 256,
|
|
|
|
|
'thsk' => 257,
|
|
|
|
|
'thsp' => 258,
|
|
|
|
|
'thst' => 259,
|
|
|
|
|
'tsch' => 260,
|
|
|
|
|
'tspr' => 261,
|
|
|
|
|
'tstr' => 262,
|
|
|
|
|
'tthr' => 263,
|
|
|
|
|
'ttsb' => 264,
|
|
|
|
|
'tzkr' => 265,
|
|
|
|
|
'whsl' => 266,
|
|
|
|
|
'wnbr' => 267,
|
|
|
|
|
'wnpl' => 268,
|
|
|
|
|
'wnsf' => 269,
|
|
|
|
|
'wnsh' => 270,
|
|
|
|
|
'wnsm' => 271,
|
|
|
|
|
'wnsp' => 272,
|
|
|
|
|
'wnst' => 273,
|
|
|
|
|
'wnsw' => 274,
|
|
|
|
|
'wnth' => 275,
|
|
|
|
|
'wntr' => 276,
|
|
|
|
|
'wrnt' => 277,
|
|
|
|
|
'wsfl' => 278,
|
|
|
|
|
'wspr' => 279,
|
|
|
|
|
'wstr' => 280,
|
|
|
|
|
'xthl' => 281,
|
|
|
|
|
),
|
|
|
|
|
'tt' => array(
|
|
|
|
|
'блзд' => 0,
|
|
|
|
|
'бльд' => 1,
|
|
|
|
|
'брьс' => 2,
|
|
|
|
|
'бств' => 3,
|
|
|
|
|
'бстр' => 4,
|
|
|
|
|
'взбл' => 5,
|
|
|
|
|
'взбр' => 6,
|
|
|
|
|
'взгл' => 7,
|
|
|
|
|
'взгр' => 8,
|
|
|
|
|
'вздв' => 9,
|
|
|
|
|
'вздр' => 10,
|
|
|
|
|
'врвг' => 11,
|
|
|
|
|
'врск' => 12,
|
|
|
|
|
'вскл' => 13,
|
|
|
|
|
'вскр' => 14,
|
|
|
|
|
'вспл' => 15,
|
|
|
|
|
'вспр' => 16,
|
|
|
|
|
'вств' => 17,
|
|
|
|
|
'встр' => 18,
|
|
|
|
|
'всхл' => 19,
|
|
|
|
|
'всхр' => 20,
|
|
|
|
|
'втск' => 21,
|
|
|
|
|
'вхск' => 22,
|
|
|
|
|
'грск' => 23,
|
|
|
|
|
'гств' => 24,
|
|
|
|
|
'гтст' => 25,
|
|
|
|
|
'гшпр' => 26,
|
|
|
|
|
'двзд' => 27,
|
|
|
|
|
'джск' => 28,
|
|
|
|
|
'дрст' => 29,
|
|
|
|
|
'дскр' => 30,
|
|
|
|
|
'дств' => 31,
|
|
|
|
|
'дстр' => 32,
|
|
|
|
|
'дтск' => 33,
|
|
|
|
|
'жств' => 34,
|
|
|
|
|
'звзд' => 35,
|
|
|
|
|
'знст' => 36,
|
|
|
|
|
'зтьс' => 37,
|
|
|
|
|
'йздр' => 38,
|
|
|
|
|
'йкбр' => 39,
|
|
|
|
|
'йльн' => 40,
|
|
|
|
|
'йншт' => 41,
|
|
|
|
|
'йпфр' => 42,
|
|
|
|
|
'йств' => 43,
|
|
|
|
|
'йстр' => 44,
|
|
|
|
|
'йтск' => 45,
|
|
|
|
|
'йфст' => 46,
|
|
|
|
|
'йхсв' => 47,
|
|
|
|
|
'йхск' => 48,
|
|
|
|
|
'йхср' => 49,
|
|
|
|
|
'йхст' => 50,
|
|
|
|
|
'кскл' => 51,
|
|
|
|
|
'кскр' => 52,
|
|
|
|
|
'кспл' => 53,
|
|
|
|
|
'кспр' => 54,
|
|
|
|
|
'кств' => 55,
|
|
|
|
|
'кстн' => 56,
|
|
|
|
|
'кстр' => 57,
|
|
|
|
|
'лвст' => 58,
|
|
|
|
|
'лжск' => 59,
|
|
|
|
|
'лльн' => 60,
|
|
|
|
|
'лльс' => 61,
|
|
|
|
|
'лстр' => 62,
|
|
|
|
|
'лсть' => 63,
|
|
|
|
|
'льгв' => 64,
|
|
|
|
|
'льдж' => 65,
|
|
|
|
|
'льдк' => 66,
|
|
|
|
|
'льдм' => 67,
|
|
|
|
|
'льдс' => 68,
|
|
|
|
|
'льдф' => 69,
|
|
|
|
|
'льдц' => 70,
|
|
|
|
|
'льдш' => 71,
|
|
|
|
|
'льдъ' => 72,
|
|
|
|
|
'льдь' => 73,
|
|
|
|
|
'льзк' => 74,
|
|
|
|
|
'льзн' => 75,
|
|
|
|
|
'льзь' => 76,
|
|
|
|
|
'лькл' => 77,
|
|
|
|
|
'лькн' => 78,
|
|
|
|
|
'льпн' => 79,
|
|
|
|
|
'льпт' => 80,
|
|
|
|
|
'льск' => 81,
|
|
|
|
|
'льсн' => 82,
|
|
|
|
|
'льст' => 83,
|
|
|
|
|
'льтк' => 84,
|
|
|
|
|
'льтм' => 85,
|
|
|
|
|
'льтн' => 86,
|
|
|
|
|
'льтп' => 87,
|
|
|
|
|
'льтр' => 88,
|
|
|
|
|
'льтс' => 89,
|
|
|
|
|
'льтт' => 90,
|
|
|
|
|
'льтф' => 91,
|
|
|
|
|
'льфр' => 92,
|
|
|
|
|
'льцг' => 93,
|
|
|
|
|
'льчс' => 94,
|
|
|
|
|
'льшб' => 95,
|
|
|
|
|
'льшк' => 96,
|
|
|
|
|
'льшн' => 97,
|
|
|
|
|
'льшп' => 98,
|
|
|
|
|
'льшф' => 99,
|
|
|
|
|
'льшь' => 100,
|
|
|
|
|
'мбль' => 101,
|
|
|
|
|
'мбрс' => 102,
|
|
|
|
|
'мвзв' => 103,
|
|
|
|
|
'мздр' => 104,
|
|
|
|
|
'мств' => 105,
|
|
|
|
|
'мтск' => 106,
|
|
|
|
|
'нгль' => 107,
|
|
|
|
|
'нгст' => 108,
|
|
|
|
|
'ндгр' => 109,
|
|
|
|
|
'ндск' => 110,
|
|
|
|
|
'ндсп' => 111,
|
|
|
|
|
'ндшп' => 112,
|
|
|
|
|
'ндшт' => 113,
|
|
|
|
|
'нкск' => 114,
|
|
|
|
|
'нктн' => 115,
|
|
|
|
|
'нктс' => 116,
|
|
|
|
|
'нсгр' => 117,
|
|
|
|
|
'нскм' => 118,
|
|
|
|
|
'нскр' => 119,
|
|
|
|
|
'нспл' => 120,
|
|
|
|
|
'нств' => 121,
|
|
|
|
|
'нстк' => 122,
|
|
|
|
|
'нстр' => 123,
|
|
|
|
|
'нтгл' => 124,
|
|
|
|
|
'нтль' => 125,
|
|
|
|
|
'нтрб' => 126,
|
|
|
|
|
'нтрв' => 127,
|
|
|
|
|
'нтрг' => 128,
|
|
|
|
|
'нтрд' => 129,
|
|
|
|
|
'нтрм' => 130,
|
|
|
|
|
'нтрн' => 131,
|
|
|
|
|
'нтрп' => 132,
|
|
|
|
|
'нтрр' => 133,
|
|
|
|
|
'нтрф' => 134,
|
|
|
|
|
'нтск' => 135,
|
|
|
|
|
'нтст' => 136,
|
|
|
|
|
'нфск' => 137,
|
|
|
|
|
'нцкл' => 138,
|
|
|
|
|
'нцпл' => 139,
|
|
|
|
|
'нькн' => 140,
|
|
|
|
|
'ньск' => 141,
|
|
|
|
|
'ньчж' => 142,
|
|
|
|
|
'псск' => 143,
|
|
|
|
|
'пств' => 144,
|
|
|
|
|
'птск' => 145,
|
|
|
|
|
'рбск' => 146,
|
|
|
|
|
'ргпр' => 147,
|
|
|
|
|
'ргск' => 148,
|
|
|
|
|
'ргфл' => 149,
|
|
|
|
|
'рдск' => 150,
|
|
|
|
|
'рдсм' => 151,
|
|
|
|
|
'рдст' => 152,
|
|
|
|
|
'рздр' => 153,
|
|
|
|
|
'рзть' => 154,
|
|
|
|
|
'ркгр' => 155,
|
|
|
|
|
'ркск' => 156,
|
|
|
|
|
'рктн' => 157,
|
|
|
|
|
'рльс' => 158,
|
|
|
|
|
'рмск' => 159,
|
|
|
|
|
'рмтр' => 160,
|
|
|
|
|
'рнск' => 161,
|
|
|
|
|
'рпск' => 162,
|
|
|
|
|
'рсдр' => 163,
|
|
|
|
|
'рсск' => 164,
|
|
|
|
|
'рств' => 165,
|
|
|
|
|
'рстк' => 166,
|
|
|
|
|
'рстн' => 167,
|
|
|
|
|
'рстр' => 168,
|
|
|
|
|
'рстс' => 169,
|
|
|
|
|
'рсть' => 170,
|
|
|
|
|
'ртвл' => 171,
|
|
|
|
|
'ртвр' => 172,
|
|
|
|
|
'ртгр' => 173,
|
|
|
|
|
'рткр' => 174,
|
|
|
|
|
'ртпл' => 175,
|
|
|
|
|
'ртпр' => 176,
|
|
|
|
|
'ртск' => 177,
|
|
|
|
|
'ртсм' => 178,
|
|
|
|
|
'ртшк' => 179,
|
|
|
|
|
'ртьф' => 180,
|
|
|
|
|
'рхзв' => 181,
|
|
|
|
|
'рхпл' => 182,
|
|
|
|
|
'рхпр' => 183,
|
|
|
|
|
'рхсв' => 184,
|
|
|
|
|
'рхск' => 185,
|
|
|
|
|
'рхсм' => 186,
|
|
|
|
|
'рхср' => 187,
|
|
|
|
|
'рхтв' => 188,
|
|
|
|
|
'рхшт' => 189,
|
|
|
|
|
'рщвл' => 190,
|
|
|
|
|
'рьмл' => 191,
|
|
|
|
|
'скск' => 192,
|
|
|
|
|
'спрь' => 193,
|
|
|
|
|
'сспр' => 194,
|
|
|
|
|
'ссср' => 195,
|
|
|
|
|
'сств' => 196,
|
|
|
|
|
'сстр' => 197,
|
|
|
|
|
'ссшп' => 198,
|
|
|
|
|
'ствл' => 199,
|
|
|
|
|
'стрс' => 200,
|
|
|
|
|
'стрш' => 201,
|
|
|
|
|
'стск' => 202,
|
|
|
|
|
'стьб' => 203,
|
|
|
|
|
'стьд' => 204,
|
|
|
|
|
'стьс' => 205,
|
|
|
|
|
'ськн' => 206,
|
|
|
|
|
'сьмн' => 207,
|
|
|
|
|
'тмст' => 208,
|
|
|
|
|
'тпрр' => 209,
|
|
|
|
|
'трст' => 210,
|
|
|
|
|
'тскр' => 211,
|
|
|
|
|
'тств' => 212,
|
|
|
|
|
'тстр' => 213,
|
|
|
|
|
'ттль' => 214,
|
|
|
|
|
'ттск' => 215,
|
|
|
|
|
'тхск' => 216,
|
|
|
|
|
'фств' => 217,
|
|
|
|
|
'фстр' => 218,
|
|
|
|
|
'хств' => 219,
|
|
|
|
|
'хстр' => 220,
|
|
|
|
|
'хткл' => 221,
|
|
|
|
|
'хтск' => 222,
|
|
|
|
|
'хтсм' => 223,
|
|
|
|
|
'цстр' => 224,
|
|
|
|
|
),
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
#несуществующие биграммы в начале и конце слов
|
|
|
|
|
private $bigrams = array(
|
|
|
|
|
#ru
|
|
|
|
|
' ёё' => 0,
|
|
|
|
|
' ёа' => 0,
|
|
|
|
|
' ёб' => 0,
|
|
|
|
|
' ёв' => 0,
|
|
|
|
|
' ёг' => 0,
|
|
|
|
|
' ёд' => 0,
|
|
|
|
|
' ёе' => 0,
|
|
|
|
|
' ёз' => 0,
|
|
|
|
|
' ёи' => 0,
|
|
|
|
|
' ёй' => 0,
|
|
|
|
|
' ён' => 0,
|
|
|
|
|
' ёо' => 0,
|
|
|
|
|
' ёп' => 0,
|
|
|
|
|
' ёс' => 0,
|
|
|
|
|
' ёт' => 0,
|
|
|
|
|
' ёу' => 0,
|
|
|
|
|
' ёф' => 0,
|
|
|
|
|
' ёц' => 0,
|
|
|
|
|
' ёч' => 0,
|
|
|
|
|
' ёщ' => 0,
|
|
|
|
|
' ёъ' => 0,
|
|
|
|
|
' ёы' => 0,
|
|
|
|
|
' ёь' => 0,
|
|
|
|
|
' ёэ' => 0,
|
|
|
|
|
' ёю' => 0,
|
|
|
|
|
' ёя' => 0,
|
|
|
|
|
' аё' => 0,
|
|
|
|
|
' аа' => 0,
|
|
|
|
|
' ае' => 0,
|
|
|
|
|
' ач' => 0,
|
|
|
|
|
' аъ' => 0,
|
|
|
|
|
' аы' => 0,
|
|
|
|
|
' аь' => 0,
|
|
|
|
|
' аю' => 0,
|
|
|
|
|
' ая' => 0,
|
|
|
|
|
' бб' => 0,
|
|
|
|
|
' бв' => 0,
|
|
|
|
|
' бг' => 0,
|
|
|
|
|
' бж' => 0,
|
|
|
|
|
' бй' => 0,
|
|
|
|
|
' бк' => 0,
|
|
|
|
|
' бм' => 0,
|
|
|
|
|
' бн' => 0,
|
|
|
|
|
' бп' => 0,
|
|
|
|
|
' бс' => 0,
|
|
|
|
|
' бт' => 0,
|
|
|
|
|
' бф' => 0,
|
|
|
|
|
' бх' => 0,
|
|
|
|
|
' бц' => 0,
|
|
|
|
|
' бч' => 0,
|
|
|
|
|
' бш' => 0,
|
|
|
|
|
' бщ' => 0,
|
|
|
|
|
' бъ' => 0,
|
|
|
|
|
' вй' => 0,
|
|
|
|
|
' вф' => 0,
|
|
|
|
|
' вщ' => 0,
|
|
|
|
|
' вэ' => 0,
|
|
|
|
|
' вю' => 0,
|
|
|
|
|
' гё' => 0,
|
|
|
|
|
' гб' => 0,
|
|
|
|
|
' гз' => 0,
|
|
|
|
|
' гй' => 0,
|
|
|
|
|
' гк' => 0,
|
|
|
|
|
' гп' => 0,
|
|
|
|
|
' гс' => 0,
|
|
|
|
|
' гт' => 0,
|
|
|
|
|
' гф' => 0,
|
|
|
|
|
' гх' => 0,
|
|
|
|
|
' гц' => 0,
|
|
|
|
|
' гч' => 0,
|
|
|
|
|
' гш' => 0,
|
|
|
|
|
' гщ' => 0,
|
|
|
|
|
' гъ' => 0,
|
|
|
|
|
' гь' => 0,
|
|
|
|
|
' гэ' => 0,
|
|
|
|
|
' дб' => 0,
|
|
|
|
|
' дг' => 0,
|
|
|
|
|
' дд' => 0,
|
|
|
|
|
' дй' => 0,
|
|
|
|
|
' дк' => 0,
|
|
|
|
|
' дп' => 0,
|
|
|
|
|
' дс' => 0,
|
|
|
|
|
' дт' => 0,
|
|
|
|
|
' дф' => 0,
|
|
|
|
|
' дх' => 0,
|
|
|
|
|
' дц' => 0,
|
|
|
|
|
' дч' => 0,
|
|
|
|
|
' дш' => 0,
|
|
|
|
|
' дъ' => 0,
|
|
|
|
|
' дэ' => 0,
|
|
|
|
|
' еа' => 0,
|
|
|
|
|
' еб' => 0,
|
|
|
|
|
' еи' => 0,
|
|
|
|
|
' ео' => 0,
|
|
|
|
|
' ет' => 0,
|
|
|
|
|
' еу' => 0,
|
|
|
|
|
' ец' => 0,
|
|
|
|
|
#' еш' => 0,
|
|
|
|
|
' еъ' => 0,
|
|
|
|
|
' еы' => 0,
|
|
|
|
|
' еь' => 0,
|
|
|
|
|
' еэ' => 0,
|
|
|
|
|
' ея' => 0,
|
|
|
|
|
' жз' => 0,
|
|
|
|
|
' жй' => 0,
|
|
|
|
|
' жк' => 0,
|
|
|
|
|
' жл' => 0,
|
|
|
|
|
' жп' => 0,
|
|
|
|
|
' жс' => 0,
|
|
|
|
|
' жт' => 0,
|
|
|
|
|
' жф' => 0,
|
|
|
|
|
' жх' => 0,
|
|
|
|
|
' жц' => 0,
|
|
|
|
|
' жч' => 0,
|
|
|
|
|
' жш' => 0,
|
|
|
|
|
' жщ' => 0,
|
|
|
|
|
' жъ' => 0,
|
|
|
|
|
' жы' => 0,
|
|
|
|
|
' жь' => 0,
|
|
|
|
|
' жэ' => 0,
|
|
|
|
|
#' жю' => 0,
|
|
|
|
|
' жя' => 0,
|
|
|
|
|
' зб' => 0,
|
|
|
|
|
' зж' => 0,
|
|
|
|
|
' зз' => 0,
|
|
|
|
|
' зй' => 0,
|
|
|
|
|
' зк' => 0,
|
|
|
|
|
' зп' => 0,
|
|
|
|
|
' зс' => 0,
|
|
|
|
|
' зт' => 0,
|
|
|
|
|
' зф' => 0,
|
|
|
|
|
' зх' => 0,
|
|
|
|
|
' зц' => 0,
|
|
|
|
|
' зч' => 0,
|
|
|
|
|
' зш' => 0,
|
|
|
|
|
' зщ' => 0,
|
|
|
|
|
' зъ' => 0,
|
|
|
|
|
' зь' => 0,
|
|
|
|
|
' зэ' => 0,
|
|
|
|
|
' иё' => 0,
|
|
|
|
|
' иа' => 0,
|
|
|
|
|
' иф' => 0,
|
|
|
|
|
' иц' => 0,
|
|
|
|
|
' иъ' => 0,
|
|
|
|
|
' иы' => 0,
|
|
|
|
|
' иь' => 0,
|
|
|
|
|
' иэ' => 0,
|
|
|
|
|
' ия' => 0,
|
|
|
|
|
' йё' => 0,
|
|
|
|
|
' йа' => 0,
|
|
|
|
|
' йб' => 0,
|
|
|
|
|
' йв' => 0,
|
|
|
|
|
' йг' => 0,
|
|
|
|
|
' йд' => 0,
|
|
|
|
|
' йж' => 0,
|
|
|
|
|
' йз' => 0,
|
|
|
|
|
' йи' => 0,
|
|
|
|
|
' йй' => 0,
|
|
|
|
|
' йк' => 0,
|
|
|
|
|
' йл' => 0,
|
|
|
|
|
' йм' => 0,
|
|
|
|
|
' йн' => 0,
|
|
|
|
|
' йп' => 0,
|
|
|
|
|
' йр' => 0,
|
|
|
|
|
' йс' => 0,
|
|
|
|
|
' йт' => 0,
|
|
|
|
|
' йу' => 0,
|
|
|
|
|
' йф' => 0,
|
|
|
|
|
' йх' => 0,
|
|
|
|
|
' йц' => 0,
|
|
|
|
|
' йч' => 0,
|
|
|
|
|
' йш' => 0,
|
|
|
|
|
' йщ' => 0,
|
|
|
|
|
' йъ' => 0,
|
|
|
|
|
' йы' => 0,
|
|
|
|
|
' йь' => 0,
|
|
|
|
|
' йэ' => 0,
|
|
|
|
|
' йю' => 0,
|
|
|
|
|
' йя' => 0,
|
|
|
|
|
' кё' => 0,
|
|
|
|
|
' кб' => 0,
|
|
|
|
|
' кд' => 0,
|
|
|
|
|
' кж' => 0,
|
|
|
|
|
' кй' => 0,
|
|
|
|
|
' кк' => 0,
|
|
|
|
|
' кф' => 0,
|
|
|
|
|
' кц' => 0,
|
|
|
|
|
' кч' => 0,
|
|
|
|
|
' кщ' => 0,
|
|
|
|
|
' къ' => 0,
|
|
|
|
|
' кя' => 0,
|
|
|
|
|
' лв' => 0,
|
|
|
|
|
' лд' => 0,
|
|
|
|
|
' лз' => 0,
|
|
|
|
|
' лй' => 0,
|
|
|
|
|
' лк' => 0,
|
|
|
|
|
' лл' => 0,
|
|
|
|
|
' лм' => 0,
|
|
|
|
|
' лн' => 0,
|
|
|
|
|
' лп' => 0,
|
|
|
|
|
' лр' => 0,
|
|
|
|
|
' лс' => 0,
|
|
|
|
|
' лт' => 0,
|
|
|
|
|
' лф' => 0,
|
|
|
|
|
' лх' => 0,
|
|
|
|
|
' лц' => 0,
|
|
|
|
|
' лч' => 0,
|
|
|
|
|
' лш' => 0,
|
|
|
|
|
' лщ' => 0,
|
|
|
|
|
' лъ' => 0,
|
|
|
|
|
' лэ' => 0,
|
|
|
|
|
' мб' => 0,
|
|
|
|
|
' мв' => 0,
|
|
|
|
|
' мд' => 0,
|
|
|
|
|
' мж' => 0,
|
|
|
|
|
' мй' => 0,
|
|
|
|
|
' мк' => 0,
|
|
|
|
|
' мп' => 0,
|
|
|
|
|
' мт' => 0,
|
|
|
|
|
' мф' => 0,
|
|
|
|
|
' мц' => 0,
|
|
|
|
|
' мъ' => 0,
|
|
|
|
|
' мь' => 0,
|
|
|
|
|
' нб' => 0,
|
|
|
|
|
' нв' => 0,
|
|
|
|
|
' нг' => 0,
|
|
|
|
|
' нд' => 0,
|
|
|
|
|
' нж' => 0,
|
|
|
|
|
' нз' => 0,
|
|
|
|
|
' нй' => 0,
|
|
|
|
|
' нк' => 0,
|
|
|
|
|
' нл' => 0,
|
|
|
|
|
' нм' => 0,
|
|
|
|
|
' нн' => 0,
|
|
|
|
|
' нп' => 0,
|
|
|
|
|
' нс' => 0,
|
|
|
|
|
' нт' => 0,
|
|
|
|
|
' нф' => 0,
|
|
|
|
|
' нх' => 0,
|
|
|
|
|
' нц' => 0,
|
|
|
|
|
' нч' => 0,
|
|
|
|
|
' нш' => 0,
|
|
|
|
|
' нщ' => 0,
|
|
|
|
|
' нъ' => 0,
|
|
|
|
|
' оё' => 0,
|
|
|
|
|
' ои' => 0,
|
|
|
|
|
' оу' => 0,
|
|
|
|
|
' оъ' => 0,
|
|
|
|
|
' оы' => 0,
|
|
|
|
|
' оь' => 0,
|
|
|
|
|
' оэ' => 0,
|
|
|
|
|
' оя' => 0,
|
|
|
|
|
' пб' => 0,
|
|
|
|
|
' пв' => 0,
|
|
|
|
|
' пг' => 0,
|
|
|
|
|
' пд' => 0,
|
|
|
|
|
' пж' => 0,
|
|
|
|
|
' пз' => 0,
|
|
|
|
|
' пй' => 0,
|
|
|
|
|
' пк' => 0,
|
|
|
|
|
' пм' => 0,
|
|
|
|
|
' пп' => 0,
|
|
|
|
|
' пц' => 0,
|
|
|
|
|
' пщ' => 0,
|
|
|
|
|
' пъ' => 0,
|
|
|
|
|
' рб' => 0,
|
|
|
|
|
' рг' => 0,
|
|
|
|
|
' рз' => 0,
|
|
|
|
|
' рй' => 0,
|
|
|
|
|
' рк' => 0,
|
|
|
|
|
' рл' => 0,
|
|
|
|
|
' рм' => 0,
|
|
|
|
|
' рн' => 0,
|
|
|
|
|
' рп' => 0,
|
|
|
|
|
' рр' => 0,
|
|
|
|
|
' рф' => 0,
|
|
|
|
|
' рх' => 0,
|
|
|
|
|
' рч' => 0,
|
|
|
|
|
' рш' => 0,
|
|
|
|
|
' рщ' => 0,
|
|
|
|
|
' ръ' => 0,
|
|
|
|
|
' сй' => 0,
|
|
|
|
|
' сщ' => 0,
|
|
|
|
|
' тб' => 0,
|
|
|
|
|
' тг' => 0,
|
|
|
|
|
' тд' => 0,
|
|
|
|
|
' тж' => 0,
|
|
|
|
|
' тз' => 0,
|
|
|
|
|
' тй' => 0,
|
|
|
|
|
' тн' => 0,
|
|
|
|
|
' тт' => 0,
|
|
|
|
|
' тх' => 0,
|
|
|
|
|
' тц' => 0,
|
|
|
|
|
' тч' => 0,
|
|
|
|
|
' тш' => 0,
|
|
|
|
|
' тъ' => 0,
|
|
|
|
|
' уу' => 0,
|
|
|
|
|
' уъ' => 0,
|
|
|
|
|
' уы' => 0,
|
|
|
|
|
' уь' => 0,
|
|
|
|
|
' фб' => 0,
|
|
|
|
|
' фв' => 0,
|
|
|
|
|
' фг' => 0,
|
|
|
|
|
' фд' => 0,
|
|
|
|
|
' фж' => 0,
|
|
|
|
|
' фз' => 0,
|
|
|
|
|
' фй' => 0,
|
|
|
|
|
' фк' => 0,
|
|
|
|
|
' фм' => 0,
|
|
|
|
|
' фн' => 0,
|
|
|
|
|
' фп' => 0,
|
|
|
|
|
' фс' => 0,
|
|
|
|
|
' фх' => 0,
|
|
|
|
|
' фц' => 0,
|
|
|
|
|
' фч' => 0,
|
|
|
|
|
' фш' => 0,
|
|
|
|
|
' фщ' => 0,
|
|
|
|
|
' фъ' => 0,
|
|
|
|
|
' фэ' => 0,
|
|
|
|
|
' фя' => 0,
|
|
|
|
|
' хё' => 0,
|
|
|
|
|
' хб' => 0,
|
|
|
|
|
' хг' => 0,
|
|
|
|
|
' хд' => 0,
|
|
|
|
|
' хж' => 0,
|
|
|
|
|
' хз' => 0,
|
|
|
|
|
' хй' => 0,
|
|
|
|
|
' хк' => 0,
|
|
|
|
|
' хп' => 0,
|
|
|
|
|
' хс' => 0,
|
|
|
|
|
' хт' => 0,
|
|
|
|
|
' хф' => 0,
|
|
|
|
|
' хц' => 0,
|
|
|
|
|
' хч' => 0,
|
|
|
|
|
' хш' => 0,
|
|
|
|
|
' хщ' => 0,
|
|
|
|
|
' хъ' => 0,
|
|
|
|
|
' хы' => 0,
|
|
|
|
|
' хь' => 0,
|
|
|
|
|
#' хэ' => 0,
|
|
|
|
|
' хю' => 0,
|
|
|
|
|
' хя' => 0,
|
|
|
|
|
' цё' => 0,
|
|
|
|
|
' цб' => 0,
|
|
|
|
|
' цг' => 0,
|
|
|
|
|
' цд' => 0,
|
|
|
|
|
' цж' => 0,
|
|
|
|
|
' цй' => 0,
|
|
|
|
|
' цл' => 0,
|
|
|
|
|
' цм' => 0,
|
|
|
|
|
' цн' => 0,
|
|
|
|
|
' цп' => 0,
|
|
|
|
|
' цр' => 0,
|
|
|
|
|
' цс' => 0,
|
|
|
|
|
' цт' => 0,
|
|
|
|
|
' цф' => 0,
|
|
|
|
|
' цх' => 0,
|
|
|
|
|
' цц' => 0,
|
|
|
|
|
' цч' => 0,
|
|
|
|
|
' цш' => 0,
|
|
|
|
|
' цщ' => 0,
|
|
|
|
|
' цъ' => 0,
|
|
|
|
|
' ць' => 0,
|
|
|
|
|
' цэ' => 0,
|
|
|
|
|
' цю' => 0,
|
|
|
|
|
' ця' => 0,
|
|
|
|
|
' чб' => 0,
|
|
|
|
|
' чг' => 0,
|
|
|
|
|
' чд' => 0,
|
|
|
|
|
' чж' => 0,
|
|
|
|
|
' чз' => 0,
|
|
|
|
|
' чй' => 0,
|
|
|
|
|
' чн' => 0,
|
|
|
|
|
' чп' => 0,
|
|
|
|
|
' чс' => 0,
|
|
|
|
|
' чф' => 0,
|
|
|
|
|
' чц' => 0,
|
|
|
|
|
' чч' => 0,
|
|
|
|
|
' чщ' => 0,
|
|
|
|
|
' чъ' => 0,
|
|
|
|
|
' чы' => 0,
|
|
|
|
|
' чэ' => 0,
|
|
|
|
|
' чю' => 0,
|
|
|
|
|
' чя' => 0,
|
|
|
|
|
' шб' => 0,
|
|
|
|
|
' шг' => 0,
|
|
|
|
|
' шд' => 0,
|
|
|
|
|
' шж' => 0,
|
|
|
|
|
' шз' => 0,
|
|
|
|
|
' шй' => 0,
|
|
|
|
|
' шс' => 0,
|
|
|
|
|
' шф' => 0,
|
|
|
|
|
' шц' => 0,
|
|
|
|
|
' шч' => 0,
|
|
|
|
|
' шщ' => 0,
|
|
|
|
|
' шъ' => 0,
|
|
|
|
|
' шы' => 0,
|
|
|
|
|
' шэ' => 0,
|
|
|
|
|
' шю' => 0,
|
|
|
|
|
' шя' => 0,
|
|
|
|
|
' щб' => 0,
|
|
|
|
|
' щв' => 0,
|
|
|
|
|
' щг' => 0,
|
|
|
|
|
' щд' => 0,
|
|
|
|
|
' щж' => 0,
|
|
|
|
|
' щз' => 0,
|
|
|
|
|
' щй' => 0,
|
|
|
|
|
' щк' => 0,
|
|
|
|
|
' щл' => 0,
|
|
|
|
|
' щм' => 0,
|
|
|
|
|
' щн' => 0,
|
|
|
|
|
' що' => 0,
|
|
|
|
|
' щп' => 0,
|
|
|
|
|
' щр' => 0,
|
|
|
|
|
' щс' => 0,
|
|
|
|
|
' щт' => 0,
|
|
|
|
|
' щф' => 0,
|
|
|
|
|
' щх' => 0,
|
|
|
|
|
' щц' => 0,
|
|
|
|
|
' щч' => 0,
|
|
|
|
|
' щш' => 0,
|
|
|
|
|
' щщ' => 0,
|
|
|
|
|
' щъ' => 0,
|
|
|
|
|
' щы' => 0,
|
|
|
|
|
' щь' => 0,
|
|
|
|
|
' щэ' => 0,
|
|
|
|
|
' щю' => 0,
|
|
|
|
|
' щя' => 0,
|
|
|
|
|
' ъё' => 0,
|
|
|
|
|
' ъа' => 0,
|
|
|
|
|
' ъб' => 0,
|
|
|
|
|
' ъв' => 0,
|
|
|
|
|
' ъг' => 0,
|
|
|
|
|
' ъд' => 0,
|
|
|
|
|
' ъе' => 0,
|
|
|
|
|
' ъж' => 0,
|
|
|
|
|
' ъз' => 0,
|
|
|
|
|
' ъи' => 0,
|
|
|
|
|
' ъй' => 0,
|
|
|
|
|
' ък' => 0,
|
|
|
|
|
' ъл' => 0,
|
|
|
|
|
' ъм' => 0,
|
|
|
|
|
' ън' => 0,
|
|
|
|
|
' ъо' => 0,
|
|
|
|
|
' ъп' => 0,
|
|
|
|
|
' ър' => 0,
|
|
|
|
|
' ъс' => 0,
|
|
|
|
|
' ът' => 0,
|
|
|
|
|
' ъу' => 0,
|
|
|
|
|
' ъф' => 0,
|
|
|
|
|
' ъх' => 0,
|
|
|
|
|
' ъц' => 0,
|
|
|
|
|
' ъч' => 0,
|
|
|
|
|
' ъш' => 0,
|
|
|
|
|
' ъщ' => 0,
|
|
|
|
|
' ъъ' => 0,
|
|
|
|
|
' ъы' => 0,
|
|
|
|
|
' ъь' => 0,
|
|
|
|
|
' ъэ' => 0,
|
|
|
|
|
' ъю' => 0,
|
|
|
|
|
' ъя' => 0,
|
|
|
|
|
' ыё' => 0,
|
|
|
|
|
' ыа' => 0,
|
|
|
|
|
' ыб' => 0,
|
|
|
|
|
' ыв' => 0,
|
|
|
|
|
' ыг' => 0,
|
|
|
|
|
' ыд' => 0,
|
|
|
|
|
' ые' => 0,
|
|
|
|
|
' ыж' => 0,
|
|
|
|
|
' ыз' => 0,
|
|
|
|
|
' ыи' => 0,
|
|
|
|
|
' ый' => 0,
|
|
|
|
|
' ык' => 0,
|
|
|
|
|
' ыл' => 0,
|
|
|
|
|
' ын' => 0,
|
|
|
|
|
' ыо' => 0,
|
|
|
|
|
' ып' => 0,
|
|
|
|
|
' ыр' => 0,
|
|
|
|
|
' ыс' => 0,
|
|
|
|
|
' ыт' => 0,
|
|
|
|
|
' ыу' => 0,
|
|
|
|
|
' ыф' => 0,
|
|
|
|
|
' ых' => 0,
|
|
|
|
|
' ыц' => 0,
|
|
|
|
|
' ыч' => 0,
|
|
|
|
|
' ыш' => 0,
|
|
|
|
|
' ыщ' => 0,
|
|
|
|
|
' ыъ' => 0,
|
|
|
|
|
' ыы' => 0,
|
|
|
|
|
' ыь' => 0,
|
|
|
|
|
' ыэ' => 0,
|
|
|
|
|
' ыю' => 0,
|
|
|
|
|
' ыя' => 0,
|
|
|
|
|
' ьё' => 0,
|
|
|
|
|
' ьа' => 0,
|
|
|
|
|
' ьб' => 0,
|
|
|
|
|
' ьв' => 0,
|
|
|
|
|
' ьг' => 0,
|
|
|
|
|
' ьд' => 0,
|
|
|
|
|
' ье' => 0,
|
|
|
|
|
' ьж' => 0,
|
|
|
|
|
' ьз' => 0,
|
|
|
|
|
' ьи' => 0,
|
|
|
|
|
' ьй' => 0,
|
|
|
|
|
' ьк' => 0,
|
|
|
|
|
' ьл' => 0,
|
|
|
|
|
' ьм' => 0,
|
|
|
|
|
' ьн' => 0,
|
|
|
|
|
' ьо' => 0,
|
|
|
|
|
' ьп' => 0,
|
|
|
|
|
' ьр' => 0,
|
|
|
|
|
' ьс' => 0,
|
|
|
|
|
' ьт' => 0,
|
|
|
|
|
' ьу' => 0,
|
|
|
|
|
' ьф' => 0,
|
|
|
|
|
' ьх' => 0,
|
|
|
|
|
' ьц' => 0,
|
|
|
|
|
' ьч' => 0,
|
|
|
|
|
' ьш' => 0,
|
|
|
|
|
' ьщ' => 0,
|
|
|
|
|
' ьъ' => 0,
|
|
|
|
|
' ьы' => 0,
|
|
|
|
|
' ьь' => 0,
|
|
|
|
|
' ьэ' => 0,
|
|
|
|
|
' ью' => 0,
|
|
|
|
|
' ья' => 0,
|
|
|
|
|
' эё' => 0,
|
|
|
|
|
' эа' => 0,
|
|
|
|
|
' эе' => 0,
|
|
|
|
|
' эи' => 0,
|
|
|
|
|
' эц' => 0,
|
|
|
|
|
' эч' => 0,
|
|
|
|
|
' эщ' => 0,
|
|
|
|
|
' эъ' => 0,
|
|
|
|
|
' эы' => 0,
|
|
|
|
|
' эь' => 0,
|
|
|
|
|
' ээ' => 0,
|
|
|
|
|
' эю' => 0,
|
|
|
|
|
' юё' => 0,
|
|
|
|
|
' юе' => 0,
|
|
|
|
|
' юи' => 0,
|
|
|
|
|
' юй' => 0,
|
|
|
|
|
' юо' => 0,
|
|
|
|
|
' юу' => 0,
|
|
|
|
|
' юц' => 0,
|
|
|
|
|
' юш' => 0,
|
|
|
|
|
' ющ' => 0,
|
|
|
|
|
' юъ' => 0,
|
|
|
|
|
' юы' => 0,
|
|
|
|
|
' юь' => 0,
|
|
|
|
|
' юэ' => 0,
|
|
|
|
|
' юя' => 0,
|
|
|
|
|
' яё' => 0,
|
|
|
|
|
' яа' => 0,
|
|
|
|
|
' яе' => 0,
|
|
|
|
|
' яж' => 0,
|
|
|
|
|
' яо' => 0,
|
|
|
|
|
' яу' => 0,
|
|
|
|
|
' яф' => 0,
|
|
|
|
|
' яц' => 0,
|
|
|
|
|
' яъ' => 0,
|
|
|
|
|
' яы' => 0,
|
|
|
|
|
' яь' => 0,
|
|
|
|
|
' яэ' => 0,
|
|
|
|
|
' яю' => 0,
|
|
|
|
|
' яя' => 0,
|
|
|
|
|
'ёё' => 0,
|
|
|
|
|
'ёё ' => 0,
|
|
|
|
|
'ёа' => 0,
|
|
|
|
|
'ёа ' => 0,
|
|
|
|
|
'ёг ' => 0,
|
|
|
|
|
'ёе' => 0,
|
|
|
|
|
'ёе ' => 0,
|
|
|
|
|
'ёи' => 0,
|
|
|
|
|
'ёи ' => 0,
|
|
|
|
|
'ёй' => 0,
|
|
|
|
|
'ёо' => 0,
|
|
|
|
|
'ёо ' => 0,
|
|
|
|
|
'ёу' => 0,
|
|
|
|
|
'ёу ' => 0,
|
|
|
|
|
'ёф' => 0,
|
|
|
|
|
'ёф ' => 0,
|
|
|
|
|
'ёц ' => 0,
|
|
|
|
|
'ёч ' => 0,
|
|
|
|
|
'ёщ ' => 0,
|
|
|
|
|
'ёъ' => 0,
|
|
|
|
|
'ёъ ' => 0,
|
|
|
|
|
'ёы' => 0,
|
|
|
|
|
'ёы ' => 0,
|
|
|
|
|
'ёь' => 0,
|
|
|
|
|
'ёь ' => 0,
|
|
|
|
|
'ёэ' => 0,
|
|
|
|
|
'ёэ ' => 0,
|
|
|
|
|
'ёю' => 0,
|
|
|
|
|
'ёя' => 0,
|
|
|
|
|
'ёя ' => 0,
|
|
|
|
|
'аё ' => 0,
|
|
|
|
|
'аа ' => 0,
|
|
|
|
|
'аъ' => 0,
|
|
|
|
|
'аъ ' => 0,
|
|
|
|
|
'аы' => 0,
|
|
|
|
|
'аы ' => 0,
|
|
|
|
|
'аь' => 0,
|
|
|
|
|
'аь ' => 0,
|
|
|
|
|
'аэ ' => 0,
|
|
|
|
|
'бё ' => 0,
|
|
|
|
|
'бб ' => 0,
|
|
|
|
|
'бв ' => 0,
|
|
|
|
|
'бг ' => 0,
|
|
|
|
|
'бд ' => 0,
|
|
|
|
|
'бж ' => 0,
|
|
|
|
|
'бз ' => 0,
|
|
|
|
|
'бй' => 0,
|
|
|
|
|
'бй ' => 0,
|
|
|
|
|
'бк ' => 0,
|
|
|
|
|
'бм ' => 0,
|
|
|
|
|
'бн ' => 0,
|
|
|
|
|
'бп ' => 0,
|
|
|
|
|
'бт ' => 0,
|
|
|
|
|
'бф ' => 0,
|
|
|
|
|
'бх ' => 0,
|
|
|
|
|
'бц ' => 0,
|
|
|
|
|
'бч ' => 0,
|
|
|
|
|
'бш ' => 0,
|
|
|
|
|
'бщ ' => 0,
|
|
|
|
|
'бъ ' => 0,
|
|
|
|
|
'бэ ' => 0,
|
|
|
|
|
'вё ' => 0,
|
|
|
|
|
'вб ' => 0,
|
|
|
|
|
'вв ' => 0,
|
|
|
|
|
'вд ' => 0,
|
|
|
|
|
'вж' => 0,
|
|
|
|
|
'вж ' => 0,
|
|
|
|
|
'вз ' => 0,
|
|
|
|
|
'вй' => 0,
|
|
|
|
|
'вй ' => 0,
|
|
|
|
|
'вл ' => 0,
|
|
|
|
|
'вп ' => 0,
|
|
|
|
|
'вф ' => 0,
|
|
|
|
|
'вц ' => 0,
|
|
|
|
|
'вч ' => 0,
|
|
|
|
|
'вщ ' => 0,
|
|
|
|
|
'въ' => 0,
|
|
|
|
|
'въ ' => 0,
|
|
|
|
|
'вэ ' => 0,
|
|
|
|
|
'гё' => 0,
|
|
|
|
|
'гё ' => 0,
|
|
|
|
|
'гб ' => 0,
|
|
|
|
|
'гг ' => 0,
|
|
|
|
|
'гж' => 0,
|
|
|
|
|
'гж ' => 0,
|
|
|
|
|
'гз ' => 0,
|
|
|
|
|
'гй' => 0,
|
|
|
|
|
'гй ' => 0,
|
|
|
|
|
'гк ' => 0,
|
|
|
|
|
'гн ' => 0,
|
|
|
|
|
'гп ' => 0,
|
|
|
|
|
'гф ' => 0,
|
|
|
|
|
'гх' => 0,
|
|
|
|
|
'гх ' => 0,
|
|
|
|
|
'гц' => 0,
|
|
|
|
|
'гц ' => 0,
|
|
|
|
|
'гч ' => 0,
|
|
|
|
|
'гш ' => 0,
|
|
|
|
|
'гщ ' => 0,
|
|
|
|
|
'гъ' => 0,
|
|
|
|
|
'гъ ' => 0,
|
|
|
|
|
'гы ' => 0,
|
|
|
|
|
'гь' => 0,
|
|
|
|
|
'гь ' => 0,
|
|
|
|
|
'гэ ' => 0,
|
|
|
|
|
'гю' => 0,
|
|
|
|
|
'гю ' => 0,
|
|
|
|
|
'гя' => 0,
|
|
|
|
|
'гя ' => 0,
|
|
|
|
|
'дё ' => 0,
|
|
|
|
|
'дб ' => 0,
|
|
|
|
|
'дг ' => 0,
|
|
|
|
|
'дд ' => 0,
|
|
|
|
|
'дй' => 0,
|
|
|
|
|
'дй ' => 0,
|
|
|
|
|
'дк ' => 0,
|
|
|
|
|
'дм ' => 0,
|
|
|
|
|
'дн ' => 0,
|
|
|
|
|
'дп ' => 0,
|
|
|
|
|
'дс ' => 0,
|
|
|
|
|
'дф ' => 0,
|
|
|
|
|
'дх ' => 0,
|
|
|
|
|
'дц ' => 0,
|
|
|
|
|
'дч ' => 0,
|
|
|
|
|
'дш ' => 0,
|
|
|
|
|
'дщ ' => 0,
|
|
|
|
|
'дъ ' => 0,
|
|
|
|
|
'еа ' => 0,
|
|
|
|
|
'еу ' => 0,
|
|
|
|
|
'еъ' => 0,
|
|
|
|
|
'еъ ' => 0,
|
|
|
|
|
'еы' => 0,
|
|
|
|
|
'еы ' => 0,
|
|
|
|
|
'еь' => 0,
|
|
|
|
|
'еь ' => 0,
|
|
|
|
|
'еэ ' => 0,
|
|
|
|
|
'жё ' => 0,
|
|
|
|
|
'жв ' => 0,
|
|
|
|
|
'жг ' => 0,
|
|
|
|
|
'жж ' => 0,
|
|
|
|
|
'жз ' => 0,
|
|
|
|
|
'жй' => 0,
|
|
|
|
|
'жй ' => 0,
|
|
|
|
|
'жк ' => 0,
|
|
|
|
|
'жл ' => 0,
|
|
|
|
|
'жн ' => 0,
|
|
|
|
|
'жп ' => 0,
|
|
|
|
|
'жр ' => 0,
|
|
|
|
|
'жс ' => 0,
|
|
|
|
|
'жт ' => 0,
|
|
|
|
|
'жф ' => 0,
|
|
|
|
|
'жх ' => 0,
|
|
|
|
|
'жц ' => 0,
|
|
|
|
|
'жч ' => 0,
|
|
|
|
|
'жш' => 0,
|
|
|
|
|
'жш ' => 0,
|
|
|
|
|
'жщ' => 0,
|
|
|
|
|
'жщ ' => 0,
|
|
|
|
|
'жъ' => 0,
|
|
|
|
|
'жъ ' => 0,
|
|
|
|
|
'жы ' => 0,
|
|
|
|
|
'жэ ' => 0,
|
|
|
|
|
'жю' => 0,
|
|
|
|
|
'жю ' => 0,
|
|
|
|
|
'жя' => 0,
|
|
|
|
|
'жя ' => 0,
|
|
|
|
|
'зё ' => 0,
|
|
|
|
|
'зж ' => 0,
|
|
|
|
|
'зз ' => 0,
|
|
|
|
|
'зй' => 0,
|
|
|
|
|
'зй ' => 0,
|
|
|
|
|
'зк ' => 0,
|
|
|
|
|
'зп ' => 0,
|
|
|
|
|
'зр ' => 0,
|
|
|
|
|
'зс ' => 0,
|
|
|
|
|
'зт ' => 0,
|
|
|
|
|
'зф' => 0,
|
|
|
|
|
'зф ' => 0,
|
|
|
|
|
'зх' => 0,
|
|
|
|
|
'зх ' => 0,
|
|
|
|
|
'зц ' => 0,
|
|
|
|
|
'зч ' => 0,
|
|
|
|
|
'зш ' => 0,
|
|
|
|
|
'зщ' => 0,
|
|
|
|
|
'зщ ' => 0,
|
|
|
|
|
'зъ ' => 0,
|
|
|
|
|
'зэ ' => 0,
|
|
|
|
|
'иъ' => 0,
|
|
|
|
|
'иъ ' => 0,
|
|
|
|
|
'иы' => 0,
|
|
|
|
|
'иы ' => 0,
|
|
|
|
|
'иь' => 0,
|
|
|
|
|
'иь ' => 0,
|
|
|
|
|
'иэ ' => 0,
|
|
|
|
|
'йё' => 0,
|
|
|
|
|
'йё ' => 0,
|
|
|
|
|
'йа ' => 0,
|
|
|
|
|
'йв ' => 0,
|
|
|
|
|
'йг ' => 0,
|
|
|
|
|
'йж' => 0,
|
|
|
|
|
'йж ' => 0,
|
|
|
|
|
'йз ' => 0,
|
|
|
|
|
'йи ' => 0,
|
|
|
|
|
'йй' => 0,
|
|
|
|
|
'йй ' => 0,
|
|
|
|
|
'йо ' => 0,
|
|
|
|
|
'йу' => 0,
|
|
|
|
|
'йу ' => 0,
|
|
|
|
|
'йч ' => 0,
|
|
|
|
|
'йш ' => 0,
|
|
|
|
|
'йщ ' => 0,
|
|
|
|
|
'йъ' => 0,
|
|
|
|
|
'йъ ' => 0,
|
|
|
|
|
'йы' => 0,
|
|
|
|
|
'йы ' => 0,
|
|
|
|
|
'йь' => 0,
|
|
|
|
|
'йь ' => 0,
|
|
|
|
|
'йэ' => 0,
|
|
|
|
|
'йэ ' => 0,
|
|
|
|
|
'йю' => 0,
|
|
|
|
|
'йю ' => 0,
|
|
|
|
|
'йя' => 0,
|
|
|
|
|
'кё ' => 0,
|
|
|
|
|
'кб ' => 0,
|
|
|
|
|
'кг ' => 0,
|
|
|
|
|
'кд ' => 0,
|
|
|
|
|
'кж ' => 0,
|
|
|
|
|
'кз ' => 0,
|
|
|
|
|
'кй' => 0,
|
|
|
|
|
'кй ' => 0,
|
|
|
|
|
'км ' => 0,
|
|
|
|
|
'кн ' => 0,
|
|
|
|
|
'кф ' => 0,
|
|
|
|
|
'кц ' => 0,
|
|
|
|
|
'кч ' => 0,
|
|
|
|
|
'кш ' => 0,
|
|
|
|
|
'кщ' => 0,
|
|
|
|
|
'кщ ' => 0,
|
|
|
|
|
'къ' => 0,
|
|
|
|
|
'къ ' => 0,
|
|
|
|
|
'кы ' => 0,
|
|
|
|
|
'кь ' => 0,
|
|
|
|
|
'кэ' => 0,
|
|
|
|
|
'кэ ' => 0,
|
|
|
|
|
'кя' => 0,
|
|
|
|
|
'кя ' => 0,
|
|
|
|
|
'лв ' => 0,
|
|
|
|
|
'лж ' => 0,
|
|
|
|
|
'лз ' => 0,
|
|
|
|
|
'лй' => 0,
|
|
|
|
|
'лй ' => 0,
|
|
|
|
|
'лр ' => 0,
|
|
|
|
|
'лф ' => 0,
|
|
|
|
|
'лх ' => 0,
|
|
|
|
|
'лц ' => 0,
|
|
|
|
|
'лч ' => 0,
|
|
|
|
|
'лш ' => 0,
|
|
|
|
|
'лщ ' => 0,
|
|
|
|
|
'лъ' => 0,
|
|
|
|
|
'лъ ' => 0,
|
|
|
|
|
'лэ' => 0,
|
|
|
|
|
'лэ ' => 0,
|
|
|
|
|
'мё ' => 0,
|
|
|
|
|
'мв ' => 0,
|
|
|
|
|
'мг ' => 0,
|
|
|
|
|
'мд ' => 0,
|
|
|
|
|
'мз ' => 0,
|
|
|
|
|
'мй' => 0,
|
|
|
|
|
'мк ' => 0,
|
|
|
|
|
'мл ' => 0,
|
|
|
|
|
'мр ' => 0,
|
|
|
|
|
'мх ' => 0,
|
|
|
|
|
'мц ' => 0,
|
|
|
|
|
'мч ' => 0,
|
|
|
|
|
'мш ' => 0,
|
|
|
|
|
'мщ ' => 0,
|
|
|
|
|
'мъ' => 0,
|
|
|
|
|
'мъ ' => 0,
|
|
|
|
|
'мэ ' => 0,
|
|
|
|
|
'мю ' => 0,
|
|
|
|
|
'нё ' => 0,
|
|
|
|
|
'нб ' => 0,
|
|
|
|
|
'нв ' => 0,
|
|
|
|
|
'нй' => 0,
|
|
|
|
|
'нл ' => 0,
|
|
|
|
|
'нп ' => 0,
|
|
|
|
|
'нщ ' => 0,
|
|
|
|
|
'нъ ' => 0,
|
|
|
|
|
'нэ ' => 0,
|
|
|
|
|
'оъ' => 0,
|
|
|
|
|
'оъ ' => 0,
|
|
|
|
|
'оы' => 0,
|
|
|
|
|
'оы ' => 0,
|
|
|
|
|
'оь' => 0,
|
|
|
|
|
'оь ' => 0,
|
|
|
|
|
'пё ' => 0,
|
|
|
|
|
'пб ' => 0,
|
|
|
|
|
'пв' => 0,
|
|
|
|
|
'пв ' => 0,
|
|
|
|
|
'пг' => 0,
|
|
|
|
|
'пг ' => 0,
|
|
|
|
|
'пд ' => 0,
|
|
|
|
|
'пж' => 0,
|
|
|
|
|
'пж ' => 0,
|
|
|
|
|
'пз' => 0,
|
|
|
|
|
'пз ' => 0,
|
|
|
|
|
'пй' => 0,
|
|
|
|
|
'пй ' => 0,
|
|
|
|
|
'пк ' => 0,
|
|
|
|
|
'пл ' => 0,
|
|
|
|
|
'пм ' => 0,
|
|
|
|
|
'пн ' => 0,
|
|
|
|
|
'пф ' => 0,
|
|
|
|
|
'пх ' => 0,
|
|
|
|
|
'пц ' => 0,
|
|
|
|
|
'пч ' => 0,
|
|
|
|
|
'пш ' => 0,
|
|
|
|
|
'пщ ' => 0,
|
|
|
|
|
'пъ' => 0,
|
|
|
|
|
'пъ ' => 0,
|
|
|
|
|
'пэ' => 0,
|
|
|
|
|
'пэ ' => 0,
|
|
|
|
|
'пю ' => 0,
|
|
|
|
|
'рё ' => 0,
|
|
|
|
|
'рй' => 0,
|
|
|
|
|
'рй ' => 0,
|
|
|
|
|
'ръ' => 0,
|
|
|
|
|
'ръ ' => 0,
|
|
|
|
|
'рэ ' => 0,
|
|
|
|
|
'сб ' => 0,
|
|
|
|
|
'св ' => 0,
|
|
|
|
|
'сг ' => 0,
|
|
|
|
|
'сд ' => 0,
|
|
|
|
|
'сж ' => 0,
|
|
|
|
|
'сз' => 0,
|
|
|
|
|
'сз ' => 0,
|
|
|
|
|
'сй' => 0,
|
|
|
|
|
'сй ' => 0,
|
|
|
|
|
'сн ' => 0,
|
|
|
|
|
'сп ' => 0,
|
|
|
|
|
'сф ' => 0,
|
|
|
|
|
'сц ' => 0,
|
|
|
|
|
'сч ' => 0,
|
|
|
|
|
'сш ' => 0,
|
|
|
|
|
'сщ ' => 0,
|
|
|
|
|
'съ ' => 0,
|
|
|
|
|
'сэ ' => 0,
|
|
|
|
|
'тб ' => 0,
|
|
|
|
|
'тг ' => 0,
|
|
|
|
|
'тд ' => 0,
|
|
|
|
|
'тж ' => 0,
|
|
|
|
|
'тз ' => 0,
|
|
|
|
|
'тй' => 0,
|
|
|
|
|
'тй ' => 0,
|
|
|
|
|
'тк ' => 0,
|
|
|
|
|
'тл ' => 0,
|
|
|
|
|
'тп ' => 0,
|
|
|
|
|
'тф ' => 0,
|
|
|
|
|
'тх ' => 0,
|
|
|
|
|
'тц ' => 0,
|
|
|
|
|
'тш ' => 0,
|
|
|
|
|
'тщ ' => 0,
|
|
|
|
|
'тъ ' => 0,
|
|
|
|
|
'уё ' => 0,
|
|
|
|
|
'уо ' => 0,
|
|
|
|
|
'уу ' => 0,
|
|
|
|
|
'уц ' => 0,
|
|
|
|
|
'уъ' => 0,
|
|
|
|
|
'уъ ' => 0,
|
|
|
|
|
'уы' => 0,
|
|
|
|
|
'уы ' => 0,
|
|
|
|
|
'уь' => 0,
|
|
|
|
|
'уь ' => 0,
|
|
|
|
|
'уэ ' => 0,
|
|
|
|
|
'фё ' => 0,
|
|
|
|
|
'фб ' => 0,
|
|
|
|
|
'фв ' => 0,
|
|
|
|
|
'фг ' => 0,
|
|
|
|
|
'фд ' => 0,
|
|
|
|
|
'фж' => 0,
|
|
|
|
|
'фж ' => 0,
|
|
|
|
|
'фз' => 0,
|
|
|
|
|
'фз ' => 0,
|
|
|
|
|
'фй' => 0,
|
|
|
|
|
'фй ' => 0,
|
|
|
|
|
'фк ' => 0,
|
|
|
|
|
'фл ' => 0,
|
|
|
|
|
'фн ' => 0,
|
|
|
|
|
'фп' => 0,
|
|
|
|
|
'фп ' => 0,
|
|
|
|
|
'фс ' => 0,
|
|
|
|
|
'фх' => 0,
|
|
|
|
|
'фх ' => 0,
|
|
|
|
|
'фц' => 0,
|
|
|
|
|
'фц ' => 0,
|
|
|
|
|
'фч ' => 0,
|
|
|
|
|
'фш ' => 0,
|
|
|
|
|
'фщ ' => 0,
|
|
|
|
|
'фъ' => 0,
|
|
|
|
|
'фъ ' => 0,
|
|
|
|
|
'фэ' => 0,
|
|
|
|
|
'фэ ' => 0,
|
|
|
|
|
'фю ' => 0,
|
|
|
|
|
'хё' => 0,
|
|
|
|
|
'хё ' => 0,
|
|
|
|
|
'хб ' => 0,
|
|
|
|
|
'хг ' => 0,
|
|
|
|
|
'хд ' => 0,
|
|
|
|
|
'хж ' => 0,
|
|
|
|
|
'хз ' => 0,
|
|
|
|
|
'хй' => 0,
|
|
|
|
|
'хй ' => 0,
|
|
|
|
|
'хк ' => 0,
|
|
|
|
|
'хн ' => 0,
|
|
|
|
|
'хп ' => 0,
|
|
|
|
|
'хр ' => 0,
|
|
|
|
|
'хс ' => 0,
|
|
|
|
|
'хф ' => 0,
|
|
|
|
|
'хх ' => 0,
|
|
|
|
|
'хц ' => 0,
|
|
|
|
|
'хч ' => 0,
|
|
|
|
|
'хш ' => 0,
|
|
|
|
|
'хщ' => 0,
|
|
|
|
|
'хщ ' => 0,
|
|
|
|
|
'хъ ' => 0,
|
|
|
|
|
'хы' => 0,
|
|
|
|
|
'хы ' => 0,
|
|
|
|
|
'хь' => 0,
|
|
|
|
|
'хь ' => 0,
|
|
|
|
|
'хэ ' => 0,
|
|
|
|
|
'хю' => 0,
|
|
|
|
|
'хю ' => 0,
|
|
|
|
|
'хя' => 0,
|
|
|
|
|
'хя ' => 0,
|
|
|
|
|
'цё' => 0,
|
|
|
|
|
'цё ' => 0,
|
|
|
|
|
'цб' => 0,
|
|
|
|
|
'цб ' => 0,
|
|
|
|
|
'цв ' => 0,
|
|
|
|
|
'цг ' => 0,
|
|
|
|
|
'цд ' => 0,
|
|
|
|
|
'цж' => 0,
|
|
|
|
|
'цж ' => 0,
|
|
|
|
|
'цз ' => 0,
|
|
|
|
|
'цй' => 0,
|
|
|
|
|
'цй ' => 0,
|
|
|
|
|
'цк ' => 0,
|
|
|
|
|
'цл ' => 0,
|
|
|
|
|
'цм ' => 0,
|
|
|
|
|
'цн ' => 0,
|
|
|
|
|
'цп ' => 0,
|
|
|
|
|
'цр ' => 0,
|
|
|
|
|
'цс ' => 0,
|
|
|
|
|
'цт ' => 0,
|
|
|
|
|
'цф' => 0,
|
|
|
|
|
'цф ' => 0,
|
|
|
|
|
'цх' => 0,
|
|
|
|
|
'цх ' => 0,
|
|
|
|
|
'цц ' => 0,
|
|
|
|
|
'цч' => 0,
|
|
|
|
|
'цч ' => 0,
|
|
|
|
|
'цш ' => 0,
|
|
|
|
|
'цщ' => 0,
|
|
|
|
|
'цщ ' => 0,
|
|
|
|
|
'цъ' => 0,
|
|
|
|
|
'цъ ' => 0,
|
|
|
|
|
'ць' => 0,
|
|
|
|
|
'ць ' => 0,
|
|
|
|
|
'цэ' => 0,
|
|
|
|
|
'цэ ' => 0,
|
|
|
|
|
'цю' => 0,
|
|
|
|
|
'цю ' => 0,
|
|
|
|
|
'ця' => 0,
|
|
|
|
|
'ця ' => 0,
|
|
|
|
|
'чё ' => 0,
|
|
|
|
|
'чб ' => 0,
|
|
|
|
|
'чг' => 0,
|
|
|
|
|
'чг ' => 0,
|
|
|
|
|
'чд' => 0,
|
|
|
|
|
'чд ' => 0,
|
|
|
|
|
'чж ' => 0,
|
|
|
|
|
'чз' => 0,
|
|
|
|
|
'чз ' => 0,
|
|
|
|
|
'чй' => 0,
|
|
|
|
|
'чй ' => 0,
|
|
|
|
|
'чк ' => 0,
|
|
|
|
|
'чл ' => 0,
|
|
|
|
|
'чм ' => 0,
|
|
|
|
|
'чн ' => 0,
|
|
|
|
|
'чп' => 0,
|
|
|
|
|
'чп ' => 0,
|
|
|
|
|
'чр ' => 0,
|
|
|
|
|
'чс ' => 0,
|
|
|
|
|
'чф' => 0,
|
|
|
|
|
'чф ' => 0,
|
|
|
|
|
'чх ' => 0,
|
|
|
|
|
'чц ' => 0,
|
|
|
|
|
'чч ' => 0,
|
|
|
|
|
'чш ' => 0,
|
|
|
|
|
'чщ' => 0,
|
|
|
|
|
'чщ ' => 0,
|
|
|
|
|
'чъ' => 0,
|
|
|
|
|
'чъ ' => 0,
|
|
|
|
|
'чы' => 0,
|
|
|
|
|
'чы ' => 0,
|
|
|
|
|
'чэ' => 0,
|
|
|
|
|
'чэ ' => 0,
|
|
|
|
|
'чю' => 0,
|
|
|
|
|
'чю ' => 0,
|
|
|
|
|
'чя' => 0,
|
|
|
|
|
'чя ' => 0,
|
|
|
|
|
'шё ' => 0,
|
|
|
|
|
'шб ' => 0,
|
|
|
|
|
'шг ' => 0,
|
|
|
|
|
'шд' => 0,
|
|
|
|
|
'шд ' => 0,
|
|
|
|
|
'шж' => 0,
|
|
|
|
|
'шж ' => 0,
|
|
|
|
|
'шз' => 0,
|
|
|
|
|
'шз ' => 0,
|
|
|
|
|
'шй' => 0,
|
|
|
|
|
'шй ' => 0,
|
|
|
|
|
'шк ' => 0,
|
|
|
|
|
'шл ' => 0,
|
|
|
|
|
'шм ' => 0,
|
|
|
|
|
'шн ' => 0,
|
|
|
|
|
'шп ' => 0,
|
|
|
|
|
'шр ' => 0,
|
|
|
|
|
'шс ' => 0,
|
|
|
|
|
'шф ' => 0,
|
|
|
|
|
'шх' => 0,
|
|
|
|
|
'шх ' => 0,
|
|
|
|
|
'шч ' => 0,
|
|
|
|
|
'шш' => 0,
|
|
|
|
|
'шш ' => 0,
|
|
|
|
|
'шщ' => 0,
|
|
|
|
|
'шщ ' => 0,
|
|
|
|
|
'шъ' => 0,
|
|
|
|
|
'шъ ' => 0,
|
|
|
|
|
'шы' => 0,
|
|
|
|
|
'шы ' => 0,
|
|
|
|
|
'шэ' => 0,
|
|
|
|
|
'шэ ' => 0,
|
|
|
|
|
'шя' => 0,
|
|
|
|
|
'шя ' => 0,
|
|
|
|
|
'щб' => 0,
|
|
|
|
|
'щб ' => 0,
|
|
|
|
|
'щв ' => 0,
|
|
|
|
|
'щг' => 0,
|
|
|
|
|
'щг ' => 0,
|
|
|
|
|
'щд' => 0,
|
|
|
|
|
'щд ' => 0,
|
|
|
|
|
'щж' => 0,
|
|
|
|
|
'щж ' => 0,
|
|
|
|
|
'щз' => 0,
|
|
|
|
|
'щз ' => 0,
|
|
|
|
|
'щй' => 0,
|
|
|
|
|
'щй ' => 0,
|
|
|
|
|
'щк' => 0,
|
|
|
|
|
'щк ' => 0,
|
|
|
|
|
'щл' => 0,
|
|
|
|
|
'щл ' => 0,
|
|
|
|
|
'щм ' => 0,
|
|
|
|
|
'щн ' => 0,
|
|
|
|
|
'щп' => 0,
|
|
|
|
|
'щп ' => 0,
|
|
|
|
|
'щр ' => 0,
|
|
|
|
|
'щс' => 0,
|
|
|
|
|
'щс ' => 0,
|
|
|
|
|
'щт' => 0,
|
|
|
|
|
'щт ' => 0,
|
|
|
|
|
'щф' => 0,
|
|
|
|
|
'щф ' => 0,
|
|
|
|
|
'щх' => 0,
|
|
|
|
|
'щх ' => 0,
|
|
|
|
|
'щц' => 0,
|
|
|
|
|
'щц ' => 0,
|
|
|
|
|
'щч' => 0,
|
|
|
|
|
'щч ' => 0,
|
|
|
|
|
'щш' => 0,
|
|
|
|
|
'щш ' => 0,
|
|
|
|
|
'щщ' => 0,
|
|
|
|
|
'щщ ' => 0,
|
|
|
|
|
'щъ' => 0,
|
|
|
|
|
'щъ ' => 0,
|
|
|
|
|
'щы' => 0,
|
|
|
|
|
'щы ' => 0,
|
|
|
|
|
'щэ' => 0,
|
|
|
|
|
'щэ ' => 0,
|
|
|
|
|
'щю' => 0,
|
|
|
|
|
'щю ' => 0,
|
|
|
|
|
'щя' => 0,
|
|
|
|
|
'щя ' => 0,
|
|
|
|
|
'ъё ' => 0,
|
|
|
|
|
'ъа' => 0,
|
|
|
|
|
'ъа ' => 0,
|
|
|
|
|
'ъб' => 0,
|
|
|
|
|
'ъб ' => 0,
|
|
|
|
|
'ъв' => 0,
|
|
|
|
|
'ъв ' => 0,
|
|
|
|
|
'ъг' => 0,
|
|
|
|
|
'ъг ' => 0,
|
|
|
|
|
'ъд' => 0,
|
|
|
|
|
'ъд ' => 0,
|
|
|
|
|
'ъе ' => 0,
|
|
|
|
|
'ъж' => 0,
|
|
|
|
|
'ъж ' => 0,
|
|
|
|
|
'ъз' => 0,
|
|
|
|
|
'ъз ' => 0,
|
|
|
|
|
'ъи' => 0,
|
|
|
|
|
'ъи ' => 0,
|
|
|
|
|
'ъй' => 0,
|
|
|
|
|
'ъй ' => 0,
|
|
|
|
|
'ък' => 0,
|
|
|
|
|
'ък ' => 0,
|
|
|
|
|
'ъл' => 0,
|
|
|
|
|
'ъл ' => 0,
|
|
|
|
|
'ъм' => 0,
|
|
|
|
|
'ъм ' => 0,
|
|
|
|
|
'ън' => 0,
|
|
|
|
|
'ън ' => 0,
|
|
|
|
|
'ъо' => 0,
|
|
|
|
|
'ъо ' => 0,
|
|
|
|
|
'ъп' => 0,
|
|
|
|
|
'ъп ' => 0,
|
|
|
|
|
'ър' => 0,
|
|
|
|
|
'ър ' => 0,
|
|
|
|
|
'ъс' => 0,
|
|
|
|
|
'ъс ' => 0,
|
|
|
|
|
'ът' => 0,
|
|
|
|
|
'ът ' => 0,
|
|
|
|
|
'ъу' => 0,
|
|
|
|
|
'ъу ' => 0,
|
|
|
|
|
'ъф' => 0,
|
|
|
|
|
'ъф ' => 0,
|
|
|
|
|
'ъх' => 0,
|
|
|
|
|
'ъх ' => 0,
|
|
|
|
|
'ъц' => 0,
|
|
|
|
|
'ъц ' => 0,
|
|
|
|
|
'ъч' => 0,
|
|
|
|
|
'ъч ' => 0,
|
|
|
|
|
'ъш' => 0,
|
|
|
|
|
'ъш ' => 0,
|
|
|
|
|
'ъщ' => 0,
|
|
|
|
|
'ъщ ' => 0,
|
|
|
|
|
'ъъ' => 0,
|
|
|
|
|
'ъъ ' => 0,
|
|
|
|
|
'ъы' => 0,
|
|
|
|
|
'ъы ' => 0,
|
|
|
|
|
'ъь' => 0,
|
|
|
|
|
'ъь ' => 0,
|
|
|
|
|
'ъэ' => 0,
|
|
|
|
|
'ъэ ' => 0,
|
|
|
|
|
'ъю ' => 0,
|
|
|
|
|
'ъя ' => 0,
|
|
|
|
|
'ыё' => 0,
|
|
|
|
|
'ыё ' => 0,
|
|
|
|
|
'ыа' => 0,
|
|
|
|
|
'ыа ' => 0,
|
|
|
|
|
'ыи ' => 0,
|
|
|
|
|
'ыо ' => 0,
|
|
|
|
|
'ыу ' => 0,
|
|
|
|
|
'ыф ' => 0,
|
|
|
|
|
'ыъ' => 0,
|
|
|
|
|
'ыъ ' => 0,
|
|
|
|
|
'ыы' => 0,
|
|
|
|
|
'ыы ' => 0,
|
|
|
|
|
'ыь' => 0,
|
|
|
|
|
'ыь ' => 0,
|
|
|
|
|
'ыэ' => 0,
|
|
|
|
|
'ыэ ' => 0,
|
|
|
|
|
'ыю ' => 0,
|
|
|
|
|
'ьа' => 0,
|
|
|
|
|
'ьа ' => 0,
|
|
|
|
|
'ьв ' => 0,
|
|
|
|
|
'ьг ' => 0,
|
|
|
|
|
'ьж ' => 0,
|
|
|
|
|
'ьз ' => 0,
|
|
|
|
|
'ьй' => 0,
|
|
|
|
|
'ьй ' => 0,
|
|
|
|
|
'ьл ' => 0,
|
|
|
|
|
'ьн ' => 0,
|
|
|
|
|
'ьр ' => 0,
|
|
|
|
|
'ьу' => 0,
|
|
|
|
|
'ьу ' => 0,
|
|
|
|
|
'ьх ' => 0,
|
|
|
|
|
'ьщ ' => 0,
|
|
|
|
|
'ьъ' => 0,
|
|
|
|
|
'ьъ ' => 0,
|
|
|
|
|
'ьы ' => 0,
|
|
|
|
|
'ьь' => 0,
|
|
|
|
|
'ьь ' => 0,
|
|
|
|
|
'ьэ ' => 0,
|
|
|
|
|
'эё' => 0,
|
|
|
|
|
'эё ' => 0,
|
|
|
|
|
'эа' => 0,
|
|
|
|
|
'эа ' => 0,
|
|
|
|
|
'эб' => 0,
|
|
|
|
|
'эб ' => 0,
|
|
|
|
|
'эв ' => 0,
|
|
|
|
|
'эг ' => 0,
|
|
|
|
|
'эд ' => 0,
|
|
|
|
|
'эе' => 0,
|
|
|
|
|
'эе ' => 0,
|
|
|
|
|
'эж' => 0,
|
|
|
|
|
'эж ' => 0,
|
|
|
|
|
'эз ' => 0,
|
|
|
|
|
'эи ' => 0,
|
|
|
|
|
'эй ' => 0,
|
|
|
|
|
'эл ' => 0,
|
|
|
|
|
'эм ' => 0,
|
|
|
|
|
'эн ' => 0,
|
|
|
|
|
'эо' => 0,
|
|
|
|
|
'эо ' => 0,
|
|
|
|
|
'эу' => 0,
|
|
|
|
|
'эу ' => 0,
|
|
|
|
|
'эф ' => 0,
|
|
|
|
|
'эх ' => 0,
|
|
|
|
|
'эц' => 0,
|
|
|
|
|
'эц ' => 0,
|
|
|
|
|
'эч' => 0,
|
|
|
|
|
'эч ' => 0,
|
|
|
|
|
'эш ' => 0,
|
|
|
|
|
'эщ' => 0,
|
|
|
|
|
'эщ ' => 0,
|
|
|
|
|
'эъ' => 0,
|
|
|
|
|
'эъ ' => 0,
|
|
|
|
|
'эы' => 0,
|
|
|
|
|
'эы ' => 0,
|
|
|
|
|
'эь' => 0,
|
|
|
|
|
'эь ' => 0,
|
|
|
|
|
'ээ ' => 0,
|
|
|
|
|
'эю' => 0,
|
|
|
|
|
'эю ' => 0,
|
|
|
|
|
'эя' => 0,
|
|
|
|
|
'эя ' => 0,
|
|
|
|
|
'юё' => 0,
|
|
|
|
|
'юё ' => 0,
|
|
|
|
|
'юа ' => 0,
|
|
|
|
|
'юе ' => 0,
|
|
|
|
|
'юж ' => 0,
|
|
|
|
|
'юи ' => 0,
|
|
|
|
|
'юл ' => 0,
|
|
|
|
|
'юо ' => 0,
|
|
|
|
|
'юу' => 0,
|
|
|
|
|
'юу ' => 0,
|
|
|
|
|
'юц ' => 0,
|
|
|
|
|
'юъ' => 0,
|
|
|
|
|
'юъ ' => 0,
|
|
|
|
|
'юы' => 0,
|
|
|
|
|
'юы ' => 0,
|
|
|
|
|
'юь' => 0,
|
|
|
|
|
'юь ' => 0,
|
|
|
|
|
'юэ ' => 0,
|
|
|
|
|
'юя' => 0,
|
|
|
|
|
'яё' => 0,
|
|
|
|
|
'яё ' => 0,
|
|
|
|
|
'яа' => 0,
|
|
|
|
|
'яа ' => 0,
|
|
|
|
|
'яе ' => 0,
|
|
|
|
|
'яо ' => 0,
|
|
|
|
|
'яф' => 0,
|
|
|
|
|
'яф ' => 0,
|
|
|
|
|
'яъ' => 0,
|
|
|
|
|
'яъ ' => 0,
|
|
|
|
|
'яы' => 0,
|
|
|
|
|
'яы ' => 0,
|
|
|
|
|
'яь' => 0,
|
|
|
|
|
'яь ' => 0,
|
|
|
|
|
'яэ' => 0,
|
|
|
|
|
'яэ ' => 0,
|
|
|
|
|
#en
|
|
|
|
|
' \'f' => 0,
|
|
|
|
|
' \'p' => 0,
|
|
|
|
|
' \'q' => 0,
|
|
|
|
|
' \'r' => 0,
|
|
|
|
|
' \'x' => 0,
|
|
|
|
|
' \'y' => 0,
|
|
|
|
|
' \'z' => 0,
|
|
|
|
|
' bj' => 0,
|
|
|
|
|
' bq' => 0,
|
|
|
|
|
' bz' => 0,
|
|
|
|
|
' c\'' => 0,
|
|
|
|
|
' cq' => 0,
|
|
|
|
|
' cv' => 0,
|
|
|
|
|
' cx' => 0,
|
|
|
|
|
' dq' => 0,
|
|
|
|
|
' dx' => 0,
|
|
|
|
|
' ez' => 0,
|
|
|
|
|
' f\'' => 0,
|
|
|
|
|
' fh' => 0,
|
|
|
|
|
' fk' => 0,
|
|
|
|
|
' fq' => 0,
|
|
|
|
|
' fv' => 0,
|
|
|
|
|
' fw' => 0,
|
|
|
|
|
' fz' => 0,
|
|
|
|
|
' g\'' => 0,
|
|
|
|
|
' gf' => 0,
|
|
|
|
|
' gg' => 0,
|
|
|
|
|
' gj' => 0,
|
|
|
|
|
' gv' => 0,
|
|
|
|
|
' gx' => 0,
|
|
|
|
|
' gz' => 0,
|
|
|
|
|
' h\'' => 0,
|
|
|
|
|
' hj' => 0,
|
|
|
|
|
' hk' => 0,
|
|
|
|
|
' hn' => 0,
|
|
|
|
|
' hq' => 0,
|
|
|
|
|
' hx' => 0,
|
|
|
|
|
' iq' => 0,
|
|
|
|
|
' iw' => 0,
|
|
|
|
|
' iy' => 0,
|
|
|
|
|
' jb' => 0,
|
|
|
|
|
' jf' => 0,
|
|
|
|
|
' jh' => 0,
|
|
|
|
|
' jj' => 0,
|
|
|
|
|
' jk' => 0,
|
|
|
|
|
' jl' => 0,
|
|
|
|
|
' jm' => 0,
|
|
|
|
|
' jq' => 0,
|
|
|
|
|
' jw' => 0,
|
|
|
|
|
' jx' => 0,
|
|
|
|
|
' jy' => 0,
|
|
|
|
|
' jz' => 0,
|
|
|
|
|
' k\'' => 0,
|
|
|
|
|
' kf' => 0,
|
|
|
|
|
' kj' => 0,
|
|
|
|
|
' kq' => 0,
|
|
|
|
|
' kt' => 0,
|
|
|
|
|
' kx' => 0,
|
|
|
|
|
' kz' => 0,
|
|
|
|
|
' lj' => 0,
|
|
|
|
|
' lk' => 0,
|
|
|
|
|
' lq' => 0,
|
|
|
|
|
' lv' => 0,
|
|
|
|
|
' mj' => 0,
|
|
|
|
|
' mq' => 0,
|
|
|
|
|
' mz' => 0,
|
|
|
|
|
' nj' => 0,
|
|
|
|
|
' nk' => 0,
|
|
|
|
|
' nq' => 0,
|
|
|
|
|
' nz' => 0,
|
|
|
|
|
' oq' => 0,
|
|
|
|
|
' pj' => 0,
|
|
|
|
|
' pz' => 0,
|
|
|
|
|
' qb' => 0,
|
|
|
|
|
' qe' => 0,
|
|
|
|
|
' qf' => 0,
|
|
|
|
|
' qg' => 0,
|
|
|
|
|
' qh' => 0,
|
|
|
|
|
' qj' => 0,
|
|
|
|
|
' qk' => 0,
|
|
|
|
|
' qo' => 0,
|
|
|
|
|
' qp' => 0,
|
|
|
|
|
' qs' => 0,
|
|
|
|
|
' qv' => 0,
|
|
|
|
|
' qx' => 0,
|
|
|
|
|
' qy' => 0,
|
|
|
|
|
' qz' => 0,
|
|
|
|
|
' rb' => 0,
|
|
|
|
|
' rk' => 0,
|
|
|
|
|
' rq' => 0,
|
|
|
|
|
' rv' => 0,
|
|
|
|
|
' rx' => 0,
|
|
|
|
|
' rz' => 0,
|
|
|
|
|
' sz' => 0,
|
|
|
|
|
' tf' => 0,
|
|
|
|
|
' tg' => 0,
|
|
|
|
|
' tj' => 0,
|
|
|
|
|
' tq' => 0,
|
|
|
|
|
' u\'' => 0,
|
|
|
|
|
' ue' => 0,
|
|
|
|
|
' uj' => 0,
|
|
|
|
|
' uo' => 0,
|
|
|
|
|
' uq' => 0,
|
|
|
|
|
' uu' => 0,
|
|
|
|
|
' uy' => 0,
|
|
|
|
|
' vb' => 0,
|
|
|
|
|
' vj' => 0,
|
|
|
|
|
' vk' => 0,
|
|
|
|
|
' vn' => 0,
|
|
|
|
|
' vq' => 0,
|
|
|
|
|
' vr' => 0,
|
|
|
|
|
' vv' => 0,
|
|
|
|
|
' vw' => 0,
|
|
|
|
|
' vx' => 0,
|
|
|
|
|
' vy' => 0,
|
|
|
|
|
' vz' => 0,
|
|
|
|
|
' wj' => 0,
|
|
|
|
|
' wl' => 0,
|
|
|
|
|
' wn' => 0,
|
|
|
|
|
' wq' => 0,
|
|
|
|
|
' wx' => 0,
|
|
|
|
|
' wz' => 0,
|
|
|
|
|
' xb' => 0,
|
|
|
|
|
' xf' => 0,
|
|
|
|
|
' xg' => 0,
|
|
|
|
|
' xh' => 0,
|
|
|
|
|
' xj' => 0,
|
|
|
|
|
' xk' => 0,
|
|
|
|
|
' xq' => 0,
|
|
|
|
|
' xt' => 0,
|
|
|
|
|
' xu' => 0,
|
|
|
|
|
' xz' => 0,
|
|
|
|
|
' yf' => 0,
|
|
|
|
|
' yg' => 0,
|
|
|
|
|
' yh' => 0,
|
|
|
|
|
' yj' => 0,
|
|
|
|
|
' yk' => 0,
|
|
|
|
|
' yl' => 0,
|
|
|
|
|
' yn' => 0,
|
|
|
|
|
' yq' => 0,
|
|
|
|
|
' yv' => 0,
|
|
|
|
|
' yx' => 0,
|
|
|
|
|
' yy' => 0,
|
|
|
|
|
' yz' => 0,
|
|
|
|
|
' z\'' => 0,
|
|
|
|
|
' zb' => 0,
|
|
|
|
|
' zc' => 0,
|
|
|
|
|
' zd' => 0,
|
|
|
|
|
' zf' => 0,
|
|
|
|
|
' zg' => 0,
|
|
|
|
|
' zh' => 0,
|
|
|
|
|
' zj' => 0,
|
|
|
|
|
' zk' => 0,
|
|
|
|
|
' zl' => 0,
|
|
|
|
|
' zm' => 0,
|
|
|
|
|
' zq' => 0,
|
|
|
|
|
' zr' => 0,
|
|
|
|
|
' zv' => 0,
|
|
|
|
|
' zw' => 0,
|
|
|
|
|
' zx' => 0,
|
|
|
|
|
' zz' => 0,
|
|
|
|
|
'\'a ' => 0,
|
|
|
|
|
'\'b' => 0,
|
|
|
|
|
'\'b ' => 0,
|
|
|
|
|
'\'c ' => 0,
|
|
|
|
|
'\'f' => 0,
|
|
|
|
|
'\'f ' => 0,
|
|
|
|
|
'\'g' => 0,
|
|
|
|
|
'\'g ' => 0,
|
|
|
|
|
'\'h ' => 0,
|
|
|
|
|
'\'i ' => 0,
|
|
|
|
|
'\'j' => 0,
|
|
|
|
|
'\'j ' => 0,
|
|
|
|
|
'\'k' => 0,
|
|
|
|
|
'\'k ' => 0,
|
|
|
|
|
'\'l ' => 0,
|
|
|
|
|
'\'n ' => 0,
|
|
|
|
|
'\'o ' => 0,
|
|
|
|
|
'\'p ' => 0,
|
|
|
|
|
'\'q' => 0,
|
|
|
|
|
'\'q ' => 0,
|
|
|
|
|
'\'r ' => 0,
|
|
|
|
|
'\'u' => 0,
|
|
|
|
|
'\'u ' => 0,
|
|
|
|
|
'\'v ' => 0,
|
|
|
|
|
'\'w ' => 0,
|
|
|
|
|
'\'x' => 0,
|
|
|
|
|
'\'x ' => 0,
|
|
|
|
|
'\'z' => 0,
|
|
|
|
|
'\'z ' => 0,
|
|
|
|
|
'b\' ' => 0,
|
|
|
|
|
'bg ' => 0,
|
|
|
|
|
'bh ' => 0,
|
|
|
|
|
'bp ' => 0,
|
|
|
|
|
'bq' => 0,
|
|
|
|
|
'bq ' => 0,
|
|
|
|
|
'bv ' => 0,
|
|
|
|
|
'bx' => 0,
|
|
|
|
|
'bz' => 0,
|
|
|
|
|
'bz ' => 0,
|
|
|
|
|
'c\' ' => 0,
|
|
|
|
|
'cf ' => 0,
|
|
|
|
|
'cj' => 0,
|
|
|
|
|
'cn ' => 0,
|
|
|
|
|
'cq ' => 0,
|
|
|
|
|
'cv' => 0,
|
|
|
|
|
'cw' => 0,
|
|
|
|
|
'cx' => 0,
|
|
|
|
|
'cx ' => 0,
|
|
|
|
|
'cz ' => 0,
|
|
|
|
|
'db ' => 0,
|
|
|
|
|
'dj ' => 0,
|
|
|
|
|
'dk ' => 0,
|
|
|
|
|
'dw ' => 0,
|
|
|
|
|
'dx' => 0,
|
|
|
|
|
'eh ' => 0,
|
|
|
|
|
'ej ' => 0,
|
|
|
|
|
'f\' ' => 0,
|
|
|
|
|
'fg ' => 0,
|
|
|
|
|
'fh ' => 0,
|
|
|
|
|
'fj' => 0,
|
|
|
|
|
'fj ' => 0,
|
|
|
|
|
'fk' => 0,
|
|
|
|
|
'fk ' => 0,
|
|
|
|
|
'fq' => 0,
|
|
|
|
|
'fq ' => 0,
|
|
|
|
|
'fv ' => 0,
|
|
|
|
|
'fw ' => 0,
|
|
|
|
|
'fx' => 0,
|
|
|
|
|
'fx ' => 0,
|
|
|
|
|
'fz' => 0,
|
|
|
|
|
'fz ' => 0,
|
|
|
|
|
'g\' ' => 0,
|
|
|
|
|
'gc ' => 0,
|
|
|
|
|
'gf ' => 0,
|
|
|
|
|
'gj ' => 0,
|
|
|
|
|
'gk ' => 0,
|
|
|
|
|
'gl ' => 0,
|
|
|
|
|
'gq' => 0,
|
|
|
|
|
'gq ' => 0,
|
|
|
|
|
'gv' => 0,
|
|
|
|
|
'gv ' => 0,
|
|
|
|
|
'gw ' => 0,
|
|
|
|
|
'gx' => 0,
|
|
|
|
|
'gx ' => 0,
|
|
|
|
|
'gz ' => 0,
|
|
|
|
|
'hb ' => 0,
|
|
|
|
|
'hc ' => 0,
|
|
|
|
|
'hg ' => 0,
|
|
|
|
|
'hh ' => 0,
|
|
|
|
|
'hj' => 0,
|
|
|
|
|
'hj ' => 0,
|
|
|
|
|
'hk ' => 0,
|
|
|
|
|
'hv' => 0,
|
|
|
|
|
'hv ' => 0,
|
|
|
|
|
'hw ' => 0,
|
|
|
|
|
'hx' => 0,
|
|
|
|
|
'hx ' => 0,
|
|
|
|
|
'hz' => 0,
|
|
|
|
|
'i\' ' => 0,
|
|
|
|
|
'ih ' => 0,
|
|
|
|
|
'iq ' => 0,
|
|
|
|
|
'iw ' => 0,
|
|
|
|
|
'j\'' => 0,
|
|
|
|
|
'j\' ' => 0,
|
|
|
|
|
'jb' => 0,
|
|
|
|
|
'jb ' => 0,
|
|
|
|
|
'jc' => 0,
|
|
|
|
|
'jc ' => 0,
|
|
|
|
|
'jd' => 0,
|
|
|
|
|
'jf' => 0,
|
|
|
|
|
'jg' => 0,
|
|
|
|
|
'jg ' => 0,
|
|
|
|
|
'jh' => 0,
|
|
|
|
|
'jh ' => 0,
|
|
|
|
|
'jj' => 0,
|
|
|
|
|
'jj ' => 0,
|
|
|
|
|
'jk ' => 0,
|
|
|
|
|
'jl ' => 0,
|
|
|
|
|
'jm' => 0,
|
|
|
|
|
'jm ' => 0,
|
|
|
|
|
'jn' => 0,
|
|
|
|
|
'jn ' => 0,
|
|
|
|
|
'jp ' => 0,
|
|
|
|
|
'jq' => 0,
|
|
|
|
|
'jq ' => 0,
|
|
|
|
|
'jr' => 0,
|
|
|
|
|
'jr ' => 0,
|
|
|
|
|
'js' => 0,
|
|
|
|
|
'js ' => 0,
|
|
|
|
|
'jt' => 0,
|
|
|
|
|
'ju ' => 0,
|
|
|
|
|
'jv' => 0,
|
|
|
|
|
'jv ' => 0,
|
|
|
|
|
'jw' => 0,
|
|
|
|
|
'jw ' => 0,
|
|
|
|
|
'jx' => 0,
|
|
|
|
|
'jx ' => 0,
|
|
|
|
|
'jy' => 0,
|
|
|
|
|
'jy ' => 0,
|
|
|
|
|
'jz' => 0,
|
|
|
|
|
'jz ' => 0,
|
|
|
|
|
'kb ' => 0,
|
|
|
|
|
'kc ' => 0,
|
|
|
|
|
'kd ' => 0,
|
|
|
|
|
'kj ' => 0,
|
|
|
|
|
'km ' => 0,
|
|
|
|
|
'kp ' => 0,
|
|
|
|
|
'kq' => 0,
|
|
|
|
|
'kq ' => 0,
|
|
|
|
|
'kv' => 0,
|
|
|
|
|
'kv ' => 0,
|
|
|
|
|
'kx' => 0,
|
|
|
|
|
'kx ' => 0,
|
|
|
|
|
'kz' => 0,
|
|
|
|
|
'kz ' => 0,
|
|
|
|
|
'lg ' => 0,
|
|
|
|
|
'lh ' => 0,
|
|
|
|
|
'lj ' => 0,
|
|
|
|
|
'lq ' => 0,
|
|
|
|
|
'lr ' => 0,
|
|
|
|
|
'lv ' => 0,
|
|
|
|
|
'lw ' => 0,
|
|
|
|
|
'lx' => 0,
|
|
|
|
|
'lz ' => 0,
|
|
|
|
|
'm\' ' => 0,
|
|
|
|
|
'mg ' => 0,
|
|
|
|
|
'mh ' => 0,
|
|
|
|
|
'mj ' => 0,
|
|
|
|
|
'mk ' => 0,
|
|
|
|
|
'mq' => 0,
|
|
|
|
|
'mq ' => 0,
|
|
|
|
|
'mx' => 0,
|
|
|
|
|
'mx ' => 0,
|
|
|
|
|
'mz' => 0,
|
|
|
|
|
'nb ' => 0,
|
|
|
|
|
'nm ' => 0,
|
|
|
|
|
'pj ' => 0,
|
|
|
|
|
'pk ' => 0,
|
|
|
|
|
'pq' => 0,
|
|
|
|
|
'pq ' => 0,
|
|
|
|
|
'pv' => 0,
|
|
|
|
|
'pw ' => 0,
|
|
|
|
|
'px' => 0,
|
|
|
|
|
'px ' => 0,
|
|
|
|
|
'pz ' => 0,
|
|
|
|
|
'q\'' => 0,
|
|
|
|
|
'q\' ' => 0,
|
|
|
|
|
'qa ' => 0,
|
|
|
|
|
'qb' => 0,
|
|
|
|
|
'qb ' => 0,
|
|
|
|
|
'qc' => 0,
|
|
|
|
|
'qc ' => 0,
|
|
|
|
|
'qd' => 0,
|
|
|
|
|
'qd ' => 0,
|
|
|
|
|
'qe' => 0,
|
|
|
|
|
'qe ' => 0,
|
|
|
|
|
'qf' => 0,
|
|
|
|
|
'qf ' => 0,
|
|
|
|
|
'qg' => 0,
|
|
|
|
|
'qg ' => 0,
|
|
|
|
|
'qh' => 0,
|
|
|
|
|
'qh ' => 0,
|
|
|
|
|
'qi' => 0,
|
|
|
|
|
'qj' => 0,
|
|
|
|
|
'qj ' => 0,
|
|
|
|
|
'qk' => 0,
|
|
|
|
|
'qk ' => 0,
|
|
|
|
|
'ql' => 0,
|
|
|
|
|
'ql ' => 0,
|
|
|
|
|
'qm' => 0,
|
|
|
|
|
'qm ' => 0,
|
|
|
|
|
'qn' => 0,
|
|
|
|
|
'qn ' => 0,
|
|
|
|
|
'qo' => 0,
|
|
|
|
|
'qo ' => 0,
|
|
|
|
|
'qp' => 0,
|
|
|
|
|
'qp ' => 0,
|
|
|
|
|
'qq' => 0,
|
|
|
|
|
'qq ' => 0,
|
|
|
|
|
'qr' => 0,
|
|
|
|
|
'qs' => 0,
|
|
|
|
|
'qs ' => 0,
|
|
|
|
|
'qt' => 0,
|
|
|
|
|
'qt ' => 0,
|
|
|
|
|
'qu ' => 0,
|
|
|
|
|
'qv' => 0,
|
|
|
|
|
'qv ' => 0,
|
|
|
|
|
'qw' => 0,
|
|
|
|
|
'qw ' => 0,
|
|
|
|
|
'qx' => 0,
|
|
|
|
|
'qx ' => 0,
|
|
|
|
|
'qy' => 0,
|
|
|
|
|
'qy ' => 0,
|
|
|
|
|
'qz' => 0,
|
|
|
|
|
'qz ' => 0,
|
|
|
|
|
'rq ' => 0,
|
|
|
|
|
'rz ' => 0,
|
|
|
|
|
'sg ' => 0,
|
|
|
|
|
'sj ' => 0,
|
|
|
|
|
'sx' => 0,
|
|
|
|
|
'sx ' => 0,
|
|
|
|
|
'sz' => 0,
|
|
|
|
|
'sz ' => 0,
|
|
|
|
|
'tg ' => 0,
|
|
|
|
|
'tj ' => 0,
|
|
|
|
|
'tq' => 0,
|
|
|
|
|
'tq ' => 0,
|
|
|
|
|
'tx' => 0,
|
|
|
|
|
'tx ' => 0,
|
|
|
|
|
'uj ' => 0,
|
|
|
|
|
'uq ' => 0,
|
|
|
|
|
'uu ' => 0,
|
|
|
|
|
'uw ' => 0,
|
|
|
|
|
'v\' ' => 0,
|
|
|
|
|
'vb' => 0,
|
|
|
|
|
'vb ' => 0,
|
|
|
|
|
'vc' => 0,
|
|
|
|
|
'vf' => 0,
|
|
|
|
|
'vf ' => 0,
|
|
|
|
|
'vg' => 0,
|
|
|
|
|
'vh' => 0,
|
|
|
|
|
'vh ' => 0,
|
|
|
|
|
'vj' => 0,
|
|
|
|
|
'vj ' => 0,
|
|
|
|
|
'vk' => 0,
|
|
|
|
|
'vk ' => 0,
|
|
|
|
|
'vl ' => 0,
|
|
|
|
|
'vm' => 0,
|
|
|
|
|
'vn ' => 0,
|
|
|
|
|
'vp' => 0,
|
|
|
|
|
'vp ' => 0,
|
|
|
|
|
'vq' => 0,
|
|
|
|
|
'vq ' => 0,
|
|
|
|
|
'vr ' => 0,
|
|
|
|
|
'vv ' => 0,
|
|
|
|
|
'vw' => 0,
|
|
|
|
|
'vw ' => 0,
|
|
|
|
|
'vx' => 0,
|
|
|
|
|
'vz' => 0,
|
|
|
|
|
'vz ' => 0,
|
|
|
|
|
'w\' ' => 0,
|
|
|
|
|
'wb ' => 0,
|
|
|
|
|
'wc ' => 0,
|
|
|
|
|
'wf ' => 0,
|
|
|
|
|
'wg ' => 0,
|
|
|
|
|
'wj' => 0,
|
|
|
|
|
'wj ' => 0,
|
|
|
|
|
'wq' => 0,
|
|
|
|
|
'wq ' => 0,
|
|
|
|
|
'wr ' => 0,
|
|
|
|
|
'wv' => 0,
|
|
|
|
|
'wv ' => 0,
|
|
|
|
|
'wx' => 0,
|
|
|
|
|
'wz ' => 0,
|
|
|
|
|
'x\'' => 0,
|
|
|
|
|
'x\' ' => 0,
|
|
|
|
|
'xa ' => 0,
|
|
|
|
|
'xb ' => 0,
|
|
|
|
|
'xc ' => 0,
|
|
|
|
|
'xd' => 0,
|
|
|
|
|
'xd ' => 0,
|
|
|
|
|
'xf ' => 0,
|
|
|
|
|
'xg ' => 0,
|
|
|
|
|
'xh ' => 0,
|
|
|
|
|
'xj' => 0,
|
|
|
|
|
'xj ' => 0,
|
|
|
|
|
'xk' => 0,
|
|
|
|
|
'xk ' => 0,
|
|
|
|
|
'xl ' => 0,
|
|
|
|
|
'xm ' => 0,
|
|
|
|
|
'xn' => 0,
|
|
|
|
|
'xn ' => 0,
|
|
|
|
|
'xp ' => 0,
|
|
|
|
|
'xq ' => 0,
|
|
|
|
|
'xr' => 0,
|
|
|
|
|
'xr ' => 0,
|
|
|
|
|
'xs ' => 0,
|
|
|
|
|
'xu ' => 0,
|
|
|
|
|
'xv' => 0,
|
|
|
|
|
'xv ' => 0,
|
|
|
|
|
'xw ' => 0,
|
|
|
|
|
'xx' => 0,
|
|
|
|
|
'xz' => 0,
|
|
|
|
|
'xz ' => 0,
|
|
|
|
|
'yb ' => 0,
|
|
|
|
|
'yc ' => 0,
|
|
|
|
|
'yd ' => 0,
|
|
|
|
|
'yf ' => 0,
|
|
|
|
|
'yg ' => 0,
|
|
|
|
|
'yh ' => 0,
|
|
|
|
|
'yj ' => 0,
|
|
|
|
|
'yq' => 0,
|
|
|
|
|
'yq ' => 0,
|
|
|
|
|
'yu ' => 0,
|
|
|
|
|
'yv ' => 0,
|
|
|
|
|
'yw ' => 0,
|
|
|
|
|
'yy' => 0,
|
|
|
|
|
'yy ' => 0,
|
|
|
|
|
'yz ' => 0,
|
|
|
|
|
'z\'' => 0,
|
|
|
|
|
'z\' ' => 0,
|
|
|
|
|
'zb ' => 0,
|
|
|
|
|
'zc' => 0,
|
|
|
|
|
'zc ' => 0,
|
|
|
|
|
'zd' => 0,
|
|
|
|
|
'zd ' => 0,
|
|
|
|
|
'zf' => 0,
|
|
|
|
|
'zf ' => 0,
|
|
|
|
|
'zg ' => 0,
|
|
|
|
|
'zh' => 0,
|
|
|
|
|
'zh ' => 0,
|
|
|
|
|
'zj' => 0,
|
|
|
|
|
'zj ' => 0,
|
|
|
|
|
'zk ' => 0,
|
|
|
|
|
'zl ' => 0,
|
|
|
|
|
'zn' => 0,
|
|
|
|
|
'zn ' => 0,
|
|
|
|
|
'zp ' => 0,
|
|
|
|
|
'zq' => 0,
|
|
|
|
|
'zq ' => 0,
|
|
|
|
|
'zr' => 0,
|
|
|
|
|
'zr ' => 0,
|
|
|
|
|
'zs' => 0,
|
|
|
|
|
'zs ' => 0,
|
|
|
|
|
'zt' => 0,
|
|
|
|
|
'zt ' => 0,
|
|
|
|
|
'zu ' => 0,
|
|
|
|
|
'zv ' => 0,
|
|
|
|
|
'zw ' => 0,
|
|
|
|
|
'zx' => 0,
|
|
|
|
|
'zx ' => 0,
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
*
|
|
|
|
|
* @param array|null $words_exceptions
|
|
|
|
|
*/
|
|
|
|
|
public function __construct(array $words_exceptions = null)
|
|
|
|
|
{
|
|
|
|
|
if (! ReflectionTypeHint::isValid()) return false;
|
|
|
|
|
#русский --> английский:
|
|
|
|
|
$this->en_correct = '/(?: (?:' . $this->tt_f . ')
|
|
|
|
|
(?: (?:' . $this->en_uniq . ') | (?:' . $this->en_sc . '){2} )
|
|
|
|
|
| (?:' . $this->en_sc . ')
|
|
|
|
|
(?:' . $this->tt_f . ')
|
|
|
|
|
(?:' . $this->en_sc . ')
|
|
|
|
|
| (?: (?:' . $this->en_uniq . ') | (?:' . $this->en_sc . '){2} )
|
|
|
|
|
(?:' . $this->tt_f . ')
|
|
|
|
|
)
|
|
|
|
|
/sxSX';
|
|
|
|
|
#английский --> русский:
|
|
|
|
|
$this->tt_correct = '/(?: (?:' . $this->en_sc . ')
|
|
|
|
|
(?: (?:' . $this->tt_uniq . ') | (?:' . $this->tt_f . '){2} )
|
|
|
|
|
| (?:' . $this->tt_f . ')
|
|
|
|
|
(?:' . $this->en_sc . ')
|
|
|
|
|
(?:' . $this->tt_f . ')
|
|
|
|
|
| (?: (?:' . $this->tt_uniq . ') | (?:' . $this->tt_f . '){2} )
|
|
|
|
|
(?:' . $this->en_sc . ')
|
|
|
|
|
)
|
|
|
|
|
/sxSX';
|
|
|
|
|
$this->table_flip = array(
|
|
|
|
|
0 => array_flip($this->table[0]),
|
|
|
|
|
1 => array_flip($this->table[1]),
|
|
|
|
|
);
|
|
|
|
|
if (is_array($words_exceptions)) $this->words_exceptions += $words_exceptions;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Исправляет клавиатурные опечатки в тексте.
|
|
|
|
|
*
|
|
|
|
|
* @param scalar|null $s Текст в кодировке UTF-8.
|
|
|
|
|
* @param int $mode Константы self::SIMILAR_CHARS и/или self::KEYBOARD_LAYOUT,
|
|
|
|
|
* (их можно комбинировать). Описание констант см. выше.
|
|
|
|
|
* При использовании self::KEYBOARD_LAYOUT время работы увеличивается примерно в 10 раз.
|
|
|
|
|
* @param array &$words Ассоц. массив со словами, которые были исправлены:
|
|
|
|
|
* в ключах оригиналы, в значениях исправленные слова.
|
|
|
|
|
* @return string|bool Returns FALSE if error occured
|
|
|
|
|
*/
|
|
|
|
|
public function parse($s, $mode = self::SIMILAR_CHARS, array &$words = null)
|
|
|
|
|
{
|
|
|
|
|
if (! ReflectionTypeHint::isValid()) return false;
|
|
|
|
|
if (! is_string($s)) return $s;
|
|
|
|
|
|
|
|
|
|
if ($mode < self::SIMILAR_CHARS || $mode > (self::SIMILAR_CHARS | self::KEYBOARD_LAYOUT | self::ADD_FIX))
|
|
|
|
|
{
|
|
|
|
|
trigger_error('Unknown mode', E_USER_WARNING);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
$this->mode = $mode;
|
|
|
|
|
|
|
|
|
|
#вырезаем и заменяем некоторые символы
|
|
|
|
|
$additional_chars = array(
|
|
|
|
|
"\xc2\xad", #"мягкие" переносы строк (­)
|
|
|
|
|
);
|
|
|
|
|
#http://ru.wikipedia.org/wiki/Диакритические_знаки
|
|
|
|
|
$s = UTF8::diactrical_remove($s, $additional_chars, $is_can_restored = true, $restore_table);
|
|
|
|
|
|
|
|
|
|
$this->words = array();
|
|
|
|
|
$s = $this->_parse1($s);
|
|
|
|
|
$s = $this->_parse2($s);
|
|
|
|
|
$s = UTF8::diactrical_restore($s, $restore_table);
|
|
|
|
|
$words = $this->words;
|
|
|
|
|
return $s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function _parse1($s)
|
|
|
|
|
{
|
|
|
|
|
#заменяем слова из текста, минимальная длина -- 3 символа, меньше нельзя
|
|
|
|
|
return preg_replace_callback('/(?> (' . $this->en . ') #1 латинские буквы
|
|
|
|
|
| (' . $this->tt . ') #2 русские буквы
|
|
|
|
|
| (' . $this->sc . ') #3 символы, которые м.б. набраны по ошибке в английской раскладке клавиатуры вместо русских букв
|
|
|
|
|
){3,}+
|
|
|
|
|
/sxSX', array($this, '_word'), $s);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function _parse2($s)
|
|
|
|
|
{
|
|
|
|
|
#исправляем русские буквы (похожие на латинские) с рядом стоящими цифрами на латинские
|
|
|
|
|
#например, это м. б. каталожные номера автозапчастей, в которых есть русские буквы: 1500A023, 52511-60900-H0, K2305, XA527672
|
|
|
|
|
#корректно обрабатываем вхождения '1-ое', 'Ту-134', 'А19-3107/06-43-Ф02-4227/06-С1'
|
|
|
|
|
if (version_compare(PHP_VERSION, '5.2.0', '<')) return $s;
|
|
|
|
|
return preg_replace_callback('~(?: (?<=[^-_/]|^)
|
|
|
|
|
(?:' . $this->ru_similar . ')++
|
|
|
|
|
(?= (?:' . $this->en . '|[-_/])*+ (?<=[^-_/]|' . $this->en . '[-_/])
|
|
|
|
|
\d [\d-_/]*+ (?!' . $this->tt_uniq . ')
|
|
|
|
|
)
|
|
|
|
|
| (?<=[^-_/]|^)
|
|
|
|
|
\d (?:' . $this->en . '|[-_/])*+ (?<=[^-_/]|' . $this->en . '[-_/])
|
|
|
|
|
\K
|
|
|
|
|
(?:' . $this->ru_similar . ')++
|
|
|
|
|
(?= [\d-_/]*+ (?!' . $this->tt_uniq . ') )
|
|
|
|
|
)
|
|
|
|
|
~sxSX', array($this, '_entry'), $s);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function _entry(array &$a)
|
|
|
|
|
{
|
|
|
|
|
$entry =& $a[0];
|
|
|
|
|
$s = strtr($entry, $this->table[0]);
|
|
|
|
|
if ($s !== $entry) $this->words[$entry] = $s;
|
|
|
|
|
return $s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function _word(array &$a)
|
|
|
|
|
{
|
|
|
|
|
$word = $a[0];
|
|
|
|
|
#var_export($a);
|
|
|
|
|
|
|
|
|
|
$suggestions = array();
|
|
|
|
|
|
|
|
|
|
#если найдено слово из мешанины русских и латинских букв
|
|
|
|
|
if (! empty($a[1]) && ! empty($a[2]))
|
|
|
|
|
{
|
|
|
|
|
if (($this->mode & self::SIMILAR_CHARS) === 0) return $word;
|
|
|
|
|
#ВНИМАНИЕ! порядок следования правил преобразования имеет значение!
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
Исправляем ошибочно набранные буквы, которые выглядят одинаково
|
|
|
|
|
в инициалах перед фамилиями (русский <--> английский), например: Т.С.Навка
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
#0a. английский --> русский:
|
|
|
|
|
if (substr($word, 1, 1) === '.' #оптимизация
|
|
|
|
|
&& preg_match('/^ ( ' . $this->en_similar_uc . '\. #первый инициал
|
|
|
|
|
(?:' . $this->en_similar_uc . '\.)? #второй инициал (необязательно)
|
|
|
|
|
) #1 инициалы
|
|
|
|
|
(' . $this->no_sc . '{2,}+) #2 фамилия (английские и русские буквы)
|
|
|
|
|
$/sxSX', $word, $m))
|
|
|
|
|
{
|
|
|
|
|
$m[2] = $this->_parse1($m[2]);
|
|
|
|
|
#фамилия по-русски?
|
|
|
|
|
if (preg_match('/^ (?:' . $this->tt_uc . ') #первая буква д.б. большая
|
|
|
|
|
(?:' . $this->tt_f . ')+ #минимальное кол-во букв в фамилии = 2
|
|
|
|
|
$/sxSX', $m[2])) return strtr($m[1], $this->table_flip[0]) . $m[2];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#0b. русский --> английский:
|
|
|
|
|
if (substr($word, 2, 1) === '.' #оптимизация
|
|
|
|
|
&& preg_match('/^ ( ' . $this->ru_similar_uc . '\. #первый инициал
|
|
|
|
|
(?:' . $this->ru_similar_uc . '\.)? #второй инициал (необязательно)
|
|
|
|
|
) #1 инициалы
|
|
|
|
|
(' . $this->no_sc . '{2,}+) #2 фамилия (английские и русские буквы)
|
|
|
|
|
$/sxSX', $word, $m))
|
|
|
|
|
{
|
|
|
|
|
$m[2] = $this->_parse1($m[2]);
|
|
|
|
|
#фамилия по-англ.?
|
|
|
|
|
if (preg_match('/^ ' . $this->en_uc . ' #первая буква д.б. большая
|
|
|
|
|
' . $this->en . '++ #минимальное кол-во букв в фамилии = 2
|
|
|
|
|
$/sxSX', $m[2])) return strtr($m[1], $this->table[0]) . $m[2];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#1. английский --> русский:
|
|
|
|
|
$this->method = 0; #буквы, которые выглядят одинаково
|
|
|
|
|
$this->is_flip = true;
|
|
|
|
|
$s = $this->_replace($word, $this->tt_correct);
|
|
|
|
|
if ($word !== $s && ! $this->_is_mixed($s)) $suggestions['tt0'] = $s;
|
|
|
|
|
|
|
|
|
|
#2. английский --> русский:
|
|
|
|
|
$this->method = 1; #буквы в другой раскладке клавиатуры
|
|
|
|
|
$this->is_flip = true;
|
|
|
|
|
$s = $this->_replace($word, $this->tt_correct);
|
|
|
|
|
if ($word !== $s) $suggestions['tt1'] = $s;
|
|
|
|
|
|
|
|
|
|
#3. русский --> английский:
|
|
|
|
|
$this->method = 0; #буквы, которые выглядят одинаково
|
|
|
|
|
$this->is_flip = false;
|
|
|
|
|
$s = $this->_replace($word, $this->en_correct);
|
|
|
|
|
if ($word !== $s && ! $this->_is_mixed($s)) $suggestions['en0'] = $s;
|
|
|
|
|
|
|
|
|
|
#4. русский --> английский:
|
|
|
|
|
$this->method = 1; #буквы в другой раскладке клавиатуры
|
|
|
|
|
$this->is_flip = false;
|
|
|
|
|
$s = $this->_replace($word, $this->en_correct);
|
|
|
|
|
if ($word !== $s) $suggestions['en1'] = $s;
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
#если найдено слово только из латинских букв; минимальная длина -- 4 буквы!
|
|
|
|
|
elseif (! empty($a[1]) && strlen($word) >= 4)
|
|
|
|
|
{
|
|
|
|
|
if (($this->mode & self::KEYBOARD_LAYOUT) === 0) return $word;
|
|
|
|
|
|
|
|
|
|
#не обрабатываем аббревиатуры, пример: AMPAS
|
|
|
|
|
if (preg_match('/^(?:' . $this->en_uc . '|' . $this->sc . '){1,6}+$/sxSX', $word)) return $word;
|
|
|
|
|
|
|
|
|
|
#английский --> русский:
|
|
|
|
|
$suggestions['en1'] = $word;
|
|
|
|
|
$suggestions['tt1'] = strtr($word, $this->table_flip[1]);
|
|
|
|
|
}
|
|
|
|
|
#если найдено слово только из русских букв; минимальная длина -- 4 буквы!
|
|
|
|
|
elseif (! empty($a[2]) && strlen($word) >= 8)
|
|
|
|
|
{
|
|
|
|
|
if (($this->mode & self::KEYBOARD_LAYOUT) === 0) return $word;
|
|
|
|
|
|
|
|
|
|
#не обрабатываем аббревиатуры, пример: ДОСААФ
|
|
|
|
|
if (preg_match('/^(?:' . $this->tt_uc . '|' . $this->sc . '){1,6}+$/sxSX', $word)) return $word;
|
|
|
|
|
|
|
|
|
|
#русский --> английский:
|
|
|
|
|
$suggestions['tt1'] = $word;
|
|
|
|
|
$suggestions['en1'] = strtr($word, $this->table[1]);
|
|
|
|
|
}
|
|
|
|
|
#найдены спецсимволы или длина слова слишком мала
|
|
|
|
|
else return $word;
|
|
|
|
|
|
|
|
|
|
$suggestions = array_unique($suggestions);
|
|
|
|
|
#var_export($suggestions);
|
|
|
|
|
|
|
|
|
|
$c = count($suggestions);
|
|
|
|
|
if ($c === 0) $s = $word;
|
|
|
|
|
else $s = $this->_detect($word, $suggestions, ! empty($a[3]));
|
|
|
|
|
if ($s !== $word)
|
|
|
|
|
{
|
|
|
|
|
$this->words[$word] = $s;
|
|
|
|
|
if ($this->mode >= (self::KEYBOARD_LAYOUT | self::ADD_FIX)) $s = '(' . $word . '=>' . $s . ')';
|
|
|
|
|
}
|
|
|
|
|
return $s;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function _replace($word, $regexp)
|
|
|
|
|
{
|
|
|
|
|
do $word = preg_replace_callback($regexp, array(&$this, '_strtr'), $w = $word);
|
|
|
|
|
while ($w !== $word);
|
|
|
|
|
return $word;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function _strtr(array $a)
|
|
|
|
|
{
|
|
|
|
|
$word =& $a[0];
|
|
|
|
|
return strtr($word, $this->is_flip ? $this->table_flip[$this->method] : $this->table[$this->method]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private function _is_mixed($word)
|
|
|
|
|
{
|
|
|
|
|
return preg_match('/(?:' . $this->en . ')/sxSX', $word) &&
|
|
|
|
|
preg_match('/(?:' . $this->tt_f . ')/sxSX', $word);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#выбираем из нескольких вариантов один
|
|
|
|
|
private function _detect($word, array $suggestions, $is_sc)
|
|
|
|
|
{
|
|
|
|
|
if (0) #DEBUG
|
|
|
|
|
{
|
|
|
|
|
//$suggestions['?'] = $word;
|
2023-10-27 10:19:18 +03:00
|
|
|
|
//var_export($suggestions);
|
2023-03-11 12:04:29 +03:00
|
|
|
|
}
|
|
|
|
|
#не д. б. несуществующих N-грамм
|
|
|
|
|
foreach ($suggestions as $type => $w)
|
|
|
|
|
{
|
|
|
|
|
$lang = substr($type, 0, 2);
|
|
|
|
|
if ($this->_bigram_exists($w, $lang)) unset($suggestions[$type]);
|
|
|
|
|
}
|
|
|
|
|
if (0) #DEBUG
|
|
|
|
|
{
|
|
|
|
|
//$suggestions['?'] = $word;
|
2023-10-27 10:19:18 +03:00
|
|
|
|
//var_export($suggestions);
|
2023-03-11 12:04:29 +03:00
|
|
|
|
}
|
|
|
|
|
if (count($suggestions) === 0) return $word;
|
|
|
|
|
|
|
|
|
|
$s = end($suggestions);
|
|
|
|
|
|
|
|
|
|
#если в $word были спецсимволы, а в $s их уже нет, возвращаем $s
|
|
|
|
|
if ($is_sc && ! preg_match('/' . $this->sc . '/sSX', $s)) return $s;
|
|
|
|
|
|
|
|
|
|
#если в $s спецсимволов больше чем букв, возвращаем $word
|
|
|
|
|
$sc_count = 0;
|
|
|
|
|
$s = preg_replace('/' . $this->sc . '/sSX', '', $s, -1, $sc_count);
|
|
|
|
|
if ($sc_count > 0 && $sc_count > UTF8::strlen($s)) return $word;
|
|
|
|
|
|
|
|
|
|
return reset($suggestions);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#анализ на основе N-грамм русского и английского языка
|
|
|
|
|
private function _bigram_exists($word, $lang)
|
|
|
|
|
{
|
|
|
|
|
$word = ($lang === 'en') ? strtolower($word) : UTF8::lowercase($word);
|
|
|
|
|
|
|
|
|
|
#шаг 0.
|
|
|
|
|
#проверяем слова в списке слов-исключений
|
|
|
|
|
if (array_key_exists($word, $this->words_exceptions[$lang])) return false;
|
|
|
|
|
|
|
|
|
|
#шаг 1.
|
|
|
|
|
#проверка на 4 согласные буквы подряд; пример: больши{нств}о, юрисконсу{льтс}тво
|
|
|
|
|
if (preg_match('/(?:' . $this->consonant_lc[$lang] . '){4}/sxSX', $word, $m)
|
|
|
|
|
#проверяем список исключений
|
|
|
|
|
&& ! array_key_exists($m[0], $this->consonants4_lc[$lang])) return true;
|
|
|
|
|
|
|
|
|
|
#шаг 2.
|
|
|
|
|
#проверка на 3 гласные буквы подряд; пример: длиннош{еее}, зм{еео}бразный
|
|
|
|
|
if (preg_match('/(?:' . $this->vowel_lc[$lang] . '){3}/sxSX', $word, $m)
|
|
|
|
|
#проверяем список исключений
|
|
|
|
|
&& ! array_key_exists($m[0], $this->vowels3_lc[$lang])) return true;
|
|
|
|
|
|
|
|
|
|
#шаг 3.
|
|
|
|
|
$length = UTF8::strlen($word);
|
|
|
|
|
for ($pos = 0, $limit = $length - 1; $pos < $limit; $pos++)
|
|
|
|
|
{
|
|
|
|
|
/*
|
|
|
|
|
TODO Качество проверки по несуществующим биграммам можно немного повысить,
|
|
|
|
|
если учитывать не только начало и конец слова, но и все позиции биграмм в слове.
|
|
|
|
|
*/
|
|
|
|
|
$ss = UTF8::substr($word, $pos, 2);
|
|
|
|
|
if ($pos === 0) $ss = ' ' . $ss; #beginning of word
|
|
|
|
|
elseif ($pos === $limit - 1) $ss = $ss . ' '; #ending of word
|
|
|
|
|
if (array_key_exists($ss, $this->bigrams)) return true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return false;
|
|
|
|
|
}
|
2023-11-02 12:12:42 +03:00
|
|
|
|
}
|