21 #include "kcharselectdata_p.h" 
   23 #include <QStringList> 
   26 #include <QtConcurrentRun> 
   40 #define NCount (VCount * TCount) 
   41 #define SCount (LCount * NCount) 
   45         "G", 
"GG", 
"N", 
"D", 
"DD", 
"R", 
"M", 
"B", 
"BB",
 
   46         "S", 
"SS", 
"", 
"J", 
"JJ", 
"C", 
"K", 
"T", 
"P", 
"H" 
   51         "A", 
"AE", 
"YA", 
"YAE", 
"EO", 
"E", 
"YEO", 
"YE", 
"O",
 
   52         "WA", 
"WAE", 
"OE", 
"YO", 
"U", 
"WEO", 
"WE", 
"WI",
 
   58         "", 
"G", 
"GG", 
"GS", 
"N", 
"NJ", 
"NH", 
"D", 
"L", 
"LG", 
"LM",
 
   59         "LB", 
"LS", 
"LT", 
"LP", 
"LH", 
"M", 
"B", 
"BS",
 
   60         "S", 
"SS", 
"NG", 
"J", 
"C", 
"K", 
"T", 
"P", 
"H" 
   63 bool KCharSelectData::openDataFile()
 
   65     if(!dataFile.isEmpty()) {
 
   69         if (!file.open(QIODevice::ReadOnly)) {
 
   72         dataFile = file.readAll();
 
   79 quint32 KCharSelectData::getDetailIndex(
const QChar& c)
 const 
   81     const uchar* data = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
   84     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+12);
 
   85     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+16);
 
   89     int max = ((offsetEnd - offsetBegin) / 27) - 1;
 
   93     static quint16 most_recent_searched;
 
   94     static quint32 most_recent_result;
 
   97     if (unicode == most_recent_searched)
 
   98         return most_recent_result;
 
  100     most_recent_searched = unicode;
 
  103         mid = (min + max) / 2;
 
  104         const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*27);
 
  105         if (unicode > midUnicode)
 
  107         else if (unicode < midUnicode)
 
  110             most_recent_result = offsetBegin + mid*27;
 
  112             return most_recent_result;
 
  116     most_recent_result = 0;
 
  120 QString KCharSelectData::formatCode(ushort code, 
int length, 
const QString& prefix, 
int base)
 
  123     while (s.
size() < length)
 
  131     if(!openDataFile()) {
 
  135     const uchar* data = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  136     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
 
  137     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
 
  139     int max = ((offsetEnd - offsetBegin) / 4) - 1;
 
  146     quint16 unicodeBegin = qFromLittleEndian<quint16>(data + offsetBegin + block*4);
 
  147     quint16 unicodeEnd = qFromLittleEndian<quint16>(data + offsetBegin + block*4 + 2);
 
  149     while(unicodeBegin < unicodeEnd) {
 
  158 QList<int> KCharSelectData::sectionContents(
int section)
 
  160     if(!openDataFile()) {
 
  164     const uchar* data = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  165     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
 
  166     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
 
  168     int max = ((offsetEnd - offsetBegin) / 4) - 1;
 
  175     for(
int i = 0; i <= max; i++) {
 
  176         const quint16 currSection = qFromLittleEndian<quint16>(data + offsetBegin + i*4);
 
  177         if(currSection == section) {
 
  178             res.
append( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) );
 
  187     if(!openDataFile()) {
 
  191     const uchar* udata = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  192     const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
 
  193     const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
 
  195     const char* data = dataFile.constData();
 
  198     while(i < stringEnd) {
 
  199         list.
append(
i18nc(
"KCharSelect section name", data + i));
 
  200         i += strlen(data + i) + 1;
 
  208     return blockName(blockIndex(c));
 
  213     return sectionName(sectionIndex(blockIndex(c)));
 
  218     if(!openDataFile()) {
 
  223     if ((unicode >= 0x3400 && unicode <= 0x4DB5)
 
  224             || (unicode >= 0x4e00 && unicode <= 0x9fa5)) {
 
  227     } 
else if (c >= 0xac00 && c <= 0xd7af) {
 
  230         int LIndex, VIndex, TIndex;
 
  232         if (SIndex < 0 || SIndex >= 
SCount)
 
  241     } 
else if (unicode >= 0xD800 && unicode <= 0xDB7F)
 
  242         return i18n(
"<Non Private Use High Surrogate>");
 
  243     else if (unicode >= 0xDB80 && unicode <= 0xDBFF)
 
  244         return i18n(
"<Private Use High Surrogate>");
 
  245     else if (unicode >= 0xDC00 && unicode <= 0xDFFF)
 
  246         return i18n(
"<Low Surrogate>");
 
  247     else if (unicode >= 0xE000 && unicode <= 0xF8FF)
 
  248         return i18n(
"<Private Use>");
 
  254         const uchar* data = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  255         const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4);
 
  256         const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8);
 
  260         int max = ((offsetEnd - offsetBegin) / 6) - 1;
 
  264             mid = (min + max) / 2;
 
  265             const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6);
 
  266             if (unicode > midUnicode)
 
  268             else if (unicode < midUnicode)
 
  271                 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2);
 
  272                 s = 
QString(dataFile.constData() + offset + 1);
 
  278             return i18n(
"<not assigned>");
 
  285 int KCharSelectData::blockIndex(
const QChar& c)
 
  287     if(!openDataFile()) {
 
  291     const uchar* data = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  292     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
 
  293     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
 
  294     const quint16 unicode = c.
unicode();
 
  296     int max = ((offsetEnd - offsetBegin) / 4) - 1;
 
  300     while (unicode > qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) && i < max) {
 
  307 int KCharSelectData::sectionIndex(
int block)
 
  309     if(!openDataFile()) {
 
  313     const uchar* data = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  314     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
 
  315     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
 
  317     int max = ((offsetEnd - offsetBegin) / 4) - 1;
 
  319     for(
int i = 0; i <= max; i++) {
 
  320         if( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) == block) {
 
  321             return qFromLittleEndian<quint16>(data + offsetBegin + i*4);
 
  328 QString KCharSelectData::blockName(
int index)
 
  330     if(!openDataFile()) {
 
  334     const uchar* udata = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  335     const quint32 stringBegin = qFromLittleEndian<quint32>(udata+16);
 
  336     const quint32 stringEnd = qFromLittleEndian<quint32>(udata+20);
 
  341     const char* data = dataFile.constData();
 
  342     while(i < stringEnd && currIndex < index) {
 
  343         i += strlen(data + i) + 1;
 
  347     return i18nc(
"KCharselect unicode block name", data + i);
 
  350 QString KCharSelectData::sectionName(
int index)
 
  352     if(!openDataFile()) {
 
  356     const uchar* udata = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  357     const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
 
  358     const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
 
  363     const char* data = dataFile.constData();
 
  364     while(i < stringEnd && currIndex < index) {
 
  365         i += strlen(data + i) + 1;
 
  369     return i18nc(
"KCharselect unicode section name", data + i);
 
  374     if(!openDataFile()) {
 
  377     const uchar* udata = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  378     const int detailIndex = getDetailIndex(c);
 
  379     if(detailIndex == 0) {
 
  383     const quint8 count = * (quint8 *)(udata + detailIndex + 6);
 
  384     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 2);
 
  388     const char* data = dataFile.constData();
 
  389     for (
int i = 0;  i < count;  i++) {
 
  391         offset += strlen(data + offset) + 1;
 
  398     if(!openDataFile()) {
 
  401     const int detailIndex = getDetailIndex(c);
 
  402     if(detailIndex == 0) {
 
  406     const uchar* udata = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  407     const quint8 count = * (quint8 *)(udata + detailIndex + 11);
 
  408     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 7);
 
  412     const char* data = dataFile.constData();
 
  413     for (
int i = 0;  i < count;  i++) {
 
  415         offset += strlen(data + offset) + 1;
 
  423     if(!openDataFile()) {
 
  426     const int detailIndex = getDetailIndex(c);
 
  427     if(detailIndex == 0) {
 
  431     const uchar* udata = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  432     const quint8 count = * (quint8 *)(udata + detailIndex + 26);
 
  433     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 22);
 
  437     for (
int i = 0;  i < count;  i++) {
 
  438         seeAlso.
append(qFromLittleEndian<quint16> (udata + offset));
 
  447     if(!openDataFile()) {
 
  450     const int detailIndex = getDetailIndex(c);
 
  451     if(detailIndex == 0) {
 
  455     const uchar* udata = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  456     const quint8 count = * (quint8 *)(udata + detailIndex + 21);
 
  457     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 17);
 
  461     const char* data = dataFile.constData();
 
  462     for (
int i = 0;  i < count;  i++) {
 
  464         offset += strlen(data + offset) + 1;
 
  472     if(!openDataFile()) {
 
  475     const int detailIndex = getDetailIndex(c);
 
  476     if(detailIndex == 0) {
 
  480     const uchar* udata = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  481     const quint8 count = * (quint8 *)(udata + detailIndex + 16);
 
  482     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 12);
 
  486     const char* data = dataFile.constData();
 
  487     for (
int i = 0;  i < count;  i++) {
 
  489         offset += strlen(data + offset) + 1;
 
  492     return approxEquivalents;
 
  497     if(!openDataFile()) {
 
  501     const char* data = dataFile.constData();
 
  502     const uchar* udata = 
reinterpret_cast<const uchar*
>(data);
 
  503     const quint32 offsetBegin = qFromLittleEndian<quint32>(udata+36);
 
  504     const quint32 offsetEnd = dataFile.size();
 
  508     int max = ((offsetEnd - offsetBegin) / 30) - 1;
 
  512         mid = (min + max) / 2;
 
  513         const quint16 midUnicode = qFromLittleEndian<quint16>(udata + offsetBegin + mid*30);
 
  514         if (unicode > midUnicode)
 
  516         else if (unicode < midUnicode)
 
  520             for(
int i = 0; i < 7; i++) {
 
  521                 quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + mid*30 + 2 + i*4);
 
  535 QChar::Category KCharSelectData::category(
const QChar& c)
 
  537     if(!openDataFile()) {
 
  543     const uchar* data = 
reinterpret_cast<const uchar*
>(dataFile.constData());
 
  544     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4);
 
  545     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8);
 
  549     int max = ((offsetEnd - offsetBegin) / 6) - 1;
 
  553         mid = (min + max) / 2;
 
  554         const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6);
 
  555         if (unicode > midUnicode)
 
  557         else if (unicode < midUnicode)
 
  560             quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2);
 
  561             const quint8 categoryCode = * (quint8 *)(data + offset);
 
  562             return QChar::Category(categoryCode);
 
  569 bool KCharSelectData::isPrint(
const QChar& c)
 
  571     QChar::Category cat = category(c);
 
  572     return !(cat == QChar::Other_Control || cat == QChar::Other_NotAssigned);
 
  575 bool KCharSelectData::isDisplayable(
const QChar& c)
 
  580     if(c == 0xFDD0 || c == 0xFDD1)
 
  583     return !isIgnorable(c) && isPrint(c);
 
  586 bool KCharSelectData::isIgnorable(
const QChar& c)
 
  603     return c == 0x00AD || c == 0x034F || c == 0x115F || c == 0x1160 ||
 
  604            c == 0x17B4 || c == 0x17B5 || (c >= 0x180B && c <= 0x180D) ||
 
  605            (c >= 0x200B && c <= 0x200F) || (c >= 0x202A && c <= 0x202E) ||
 
  606            (c >= 0x2060 && c <= 0x206F) || c == 0x3164 ||
 
  607            (c >= 0xFE00 && c <= 0xFE0F) || c == 0xFEFF || c == 0xFFA0 ||
 
  608            (c >= 0xFFF0 && c <= 0xFFF8);
 
  611 bool KCharSelectData::isCombining(
const QChar &c)
 
  613     return section(c) == 
i18nc(
"KCharSelect section name", 
"Combining Diacritical Marks");
 
  621     if (!isDisplayable(c)) {
 
  622         return QString(
"<b>") + 
i18n(
"Non-printable") + 
"</b>";
 
  625         if (isCombining(c)) {
 
  626             s += displayCombining(c);
 
  635 QString KCharSelectData::displayCombining(
const QChar &c)
 
  652 QString KCharSelectData::categoryText(QChar::Category category)
 
  655     case QChar::Other_Control: 
return i18n(
"Other, Control");
 
  656     case QChar::Other_Format: 
return i18n(
"Other, Format");
 
  657     case QChar::Other_NotAssigned: 
return i18n(
"Other, Not Assigned");
 
  658     case QChar::Other_PrivateUse: 
return i18n(
"Other, Private Use");
 
  659     case QChar::Other_Surrogate: 
return i18n(
"Other, Surrogate");
 
  660     case QChar::Letter_Lowercase: 
return i18n(
"Letter, Lowercase");
 
  661     case QChar::Letter_Modifier: 
return i18n(
"Letter, Modifier");
 
  662     case QChar::Letter_Other: 
return i18n(
"Letter, Other");
 
  663     case QChar::Letter_Titlecase: 
return i18n(
"Letter, Titlecase");
 
  664     case QChar::Letter_Uppercase: 
return i18n(
"Letter, Uppercase");
 
  665     case QChar::Mark_SpacingCombining: 
return i18n(
"Mark, Spacing Combining");
 
  666     case QChar::Mark_Enclosing: 
return i18n(
"Mark, Enclosing");
 
  667     case QChar::Mark_NonSpacing: 
return i18n(
"Mark, Non-Spacing");
 
  668     case QChar::Number_DecimalDigit: 
return i18n(
"Number, Decimal Digit");
 
  669     case QChar::Number_Letter: 
return i18n(
"Number, Letter");
 
  670     case QChar::Number_Other: 
return i18n(
"Number, Other");
 
  671     case QChar::Punctuation_Connector: 
return i18n(
"Punctuation, Connector");
 
  672     case QChar::Punctuation_Dash: 
return i18n(
"Punctuation, Dash");
 
  673     case QChar::Punctuation_Close: 
return i18n(
"Punctuation, Close");
 
  674     case QChar::Punctuation_FinalQuote: 
return i18n(
"Punctuation, Final Quote");
 
  675     case QChar::Punctuation_InitialQuote: 
return i18n(
"Punctuation, Initial Quote");
 
  676     case QChar::Punctuation_Other: 
return i18n(
"Punctuation, Other");
 
  677     case QChar::Punctuation_Open: 
return i18n(
"Punctuation, Open");
 
  678     case QChar::Symbol_Currency: 
return i18n(
"Symbol, Currency");
 
  679     case QChar::Symbol_Modifier: 
return i18n(
"Symbol, Modifier");
 
  680     case QChar::Symbol_Math: 
return i18n(
"Symbol, Math");
 
  681     case QChar::Symbol_Other: 
return i18n(
"Symbol, Other");
 
  682     case QChar::Separator_Line: 
return i18n(
"Separator, Line");
 
  683     case QChar::Separator_Paragraph: 
return i18n(
"Separator, Paragraph");
 
  684     case QChar::Separator_Space: 
return i18n(
"Separator, Space");
 
  685     default: 
return i18n(
"Unknown");
 
  697     if(simplified.
length() == 1) {
 
  702     if (searchStrings.
count() == 0) {
 
  706     QRegExp regExp(
"^(|u\\+|U\\+|0x|0X)([A-Fa-f0-9]{4})$");
 
  707     foreach(
const QString &s, searchStrings) {
 
  708         if(regExp.exactMatch(s)) {
 
  709             returnRes.
append(regExp.cap(2).toInt(0, 16));
 
  712                 searchStrings[searchStrings.
indexOf(s)] = regExp.cap(2);
 
  717         int unicode = s.
toInt(&ok);
 
  718         if (ok && unicode >= 0 && unicode <= 0xFFFF) {
 
  719             returnRes.
append(unicode);
 
  723     bool firstSubString = 
true;
 
  724     foreach(
const QString &s, searchStrings) {
 
  726         if (firstSubString) {
 
  728             firstSubString = 
false;
 
  736     foreach(
const QChar &c, returnRes) {
 
  743     foreach(
const quint16 &c, sortedResult) {
 
  752     futureIndex.waitForFinished();
 
  753     const Index index = futureIndex;
 
  754     Index::const_iterator pos = index.lowerBound(s);
 
  757     while (pos != index.constEnd() && pos.key().startsWith(s)) {
 
  758         foreach (
const quint16 &c, pos.value()) {
 
  773     while (end < length) {
 
  774         while (end < length && (s[end].isLetterOrNumber() || s[end] == 
'+')) {
 
  778             result.
append(s.
mid(start, end - start));
 
  781         while (end < length && !(s[end].isLetterOrNumber() || s[end] == 
'+')) {
 
  789 void KCharSelectData::appendToIndex(Index *index, quint16 unicode, 
const QString& s)
 
  792     foreach(
const QString &s, strings) {
 
  793         (*index)[s.
toLower()].append(unicode);
 
  797 Index KCharSelectData::createIndex(
const QByteArray& dataFile)
 
  802     const uchar* udata = 
reinterpret_cast<const uchar*
>(dataFile.
constData());
 
  804     const quint32 nameOffsetBegin = qFromLittleEndian<quint32>(udata+4);
 
  805     const quint32 nameOffsetEnd = qFromLittleEndian<quint32>(udata+8);
 
  807     int max = ((nameOffsetEnd - nameOffsetBegin) / 6) - 1;
 
  809     for (
int pos = 0; pos <= max; pos++) {
 
  810         const quint16 unicode = qFromLittleEndian<quint16>(udata + nameOffsetBegin + pos*6);
 
  811         quint32 offset = qFromLittleEndian<quint32>(udata + nameOffsetBegin + pos*6 + 2);
 
  812         appendToIndex(&i, unicode, 
QString(data + offset + 1));
 
  816     const quint32 detailsOffsetBegin = qFromLittleEndian<quint32>(udata+12);
 
  817     const quint32 detailsOffsetEnd = qFromLittleEndian<quint32>(udata+16);
 
  819     max = ((detailsOffsetEnd - detailsOffsetBegin) / 27) - 1;
 
  821     for (
int pos = 0; pos <= max; pos++) {
 
  822         const quint16 unicode = qFromLittleEndian<quint16>(udata + detailsOffsetBegin + pos*27);
 
  825         const quint8 aliasCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 6);
 
  826         quint32 aliasOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 2);
 
  828         for (
int j = 0;  j < aliasCount;  j++) {
 
  830             aliasOffset += strlen(data + aliasOffset) + 1;
 
  834         const quint8 notesCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 11);
 
  835         quint32 notesOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 7);
 
  837         for (
int j = 0;  j < notesCount;  j++) {
 
  839             notesOffset += strlen(data + notesOffset) + 1;
 
  843         const quint8 apprCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 16);
 
  844         quint32 apprOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 12);
 
  846         for (
int j = 0;  j < apprCount;  j++) {
 
  848             apprOffset += strlen(data + apprOffset) + 1;
 
  852         const quint8 equivCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 21);
 
  853         quint32 equivOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 17);
 
  855         for (
int j = 0;  j < equivCount;  j++) {
 
  857             equivOffset += strlen(data + equivOffset) + 1;
 
  861         const quint8 seeAlsoCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 26);
 
  862         quint32 seeAlsoOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 22);
 
  864         for (
int j = 0;  j < seeAlsoCount;  j++) {
 
  865             quint16 seeAlso = qFromLittleEndian<quint16> (udata + seeAlsoOffset);
 
  866             appendToIndex(&i, unicode, formatCode(seeAlso, 4, 
QString()));
 
  867             equivOffset += strlen(data + equivOffset) + 1;
 
QString i18n(const char *text)
static const char JAMO_V_TABLE[][4]
static const char JAMO_T_TABLE[][4]
static const char JAMO_L_TABLE[][4]
QString & prepend(QChar ch)
static QString locate(const char *type, const QString &filename, const KComponentData &cData=KGlobal::mainComponent())
QString simplified() const
const char * name(StandardAction id)
This will return the internal name of a given standard action. 
KAction * find(const QObject *recvr, const char *slot, QObject *parent)
Initiate a 'find' request in the current document. 
const_iterator insert(const T &value)
Category category() const
QString i18nc(const char *ctxt, const char *text)
QString number(int n, int base)
int count(const T &value) const
void append(const T &value)
int toInt(bool *ok, int base) const
const char * constData() const
QFuture< T > run(Function function,...)
KGuiItem ok()
Returns the 'Ok' gui item. 
QString mid(int position, int n) const
bool remove(const T &value)
QSet< T > & intersect(const QSet< T > &other)
const QChar at(int position) const
QList< T > toList() const
QString fromLatin1(const char *str, int size)
int indexOf(const QRegExp &rx, int from) const
const KShortcut & end()
Goto end of the document.