• Skip to content
  • Skip to link menu
KDE 4.7 API Reference
  • KDE API Reference
  • kdelibs
  • KDE Home
  • Contact Us
 

KDEUI

kcharselectdata.cpp
Go to the documentation of this file.
00001 /* This file is part of the KDE libraries
00002 
00003    Copyright (C) 2007 Daniel Laidig <d.laidig@gmx.de>
00004 
00005    This library is free software; you can redistribute it and/or
00006    modify it under the terms of the GNU Library General Public
00007    License as published by the Free Software Foundation; either
00008    version 2 of the License, or (at your option) any later version.
00009 
00010    This library is distributed in the hope that it will be useful,
00011    but WITHOUT ANY WARRANTY; without even the implied warranty of
00012    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013    Library General Public License for more details.
00014 
00015    You should have received a copy of the GNU Library General Public License
00016    along with this library; see the file COPYING.LIB.  If not, write to
00017    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018    Boston, MA 02110-1301, USA.
00019 */
00020 
00021 #include "kcharselectdata_p.h"
00022 
00023 #include <QStringList>
00024 #include <QFile>
00025 #include <qendian.h>
00026 #include <QtConcurrentRun>
00027 
00028 #include <string.h>
00029 #include <klocalizedstring.h>
00030 #include <kstandarddirs.h>
00031 
00032 /* constants for hangul (de)composition, see UAX #15 */
00033 #define SBase 0xAC00
00034 #define LBase 0x1100
00035 #define VBase 0x1161
00036 #define TBase 0x11A7
00037 #define LCount 19
00038 #define VCount 21
00039 #define TCount 28
00040 #define NCount (VCount * TCount)
00041 #define SCount (LCount * NCount)
00042 
00043 static const char JAMO_L_TABLE[][4] =
00044     {
00045         "G", "GG", "N", "D", "DD", "R", "M", "B", "BB",
00046         "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H"
00047     };
00048 
00049 static const char JAMO_V_TABLE[][4] =
00050     {
00051         "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O",
00052         "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI",
00053         "YU", "EU", "YI", "I"
00054     };
00055 
00056 static const char JAMO_T_TABLE[][4] =
00057     {
00058         "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM",
00059         "LB", "LS", "LT", "LP", "LH", "M", "B", "BS",
00060         "S", "SS", "NG", "J", "C", "K", "T", "P", "H"
00061     };
00062 
00063 bool KCharSelectData::openDataFile()
00064 {
00065     if(!dataFile.isEmpty()) {
00066         return true;
00067     } else {
00068         QFile file(KStandardDirs::locate("data", "kcharselect/kcharselect-data"));
00069         if (!file.open(QIODevice::ReadOnly)) {
00070             return false;
00071         }
00072         dataFile = file.readAll();
00073         file.close();
00074         futureIndex = QtConcurrent::run(this, &KCharSelectData::createIndex, dataFile);
00075         return true;
00076     }
00077 }
00078 
00079 quint32 KCharSelectData::getDetailIndex(const QChar& c) const
00080 {
00081     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00082     // Convert from little-endian, so that this code works on PPC too.
00083     // http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=482286
00084     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+12);
00085     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+16);
00086 
00087     int min = 0;
00088     int mid;
00089     int max = ((offsetEnd - offsetBegin) / 27) - 1;
00090 
00091     quint16 unicode = c.unicode();
00092 
00093     static quint16 most_recent_searched;
00094     static quint32 most_recent_result;
00095 
00096 
00097     if (unicode == most_recent_searched)
00098         return most_recent_result;
00099 
00100     most_recent_searched = unicode;
00101 
00102     while (max >= min) {
00103         mid = (min + max) / 2;
00104         const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*27);
00105         if (unicode > midUnicode)
00106             min = mid + 1;
00107         else if (unicode < midUnicode)
00108             max = mid - 1;
00109         else {
00110             most_recent_result = offsetBegin + mid*27;
00111 
00112             return most_recent_result;
00113         }
00114     }
00115 
00116     most_recent_result = 0;
00117     return 0;
00118 }
00119 
00120 QString KCharSelectData::formatCode(ushort code, int length, const QString& prefix, int base)
00121 {
00122     QString s = QString::number(code, base).toUpper();
00123     while (s.size() < length)
00124         s.prepend('0');
00125     s.prepend(prefix);
00126     return s;
00127 }
00128 
00129 QList<QChar> KCharSelectData::blockContents(int block)
00130 {
00131     if(!openDataFile()) {
00132         return QList<QChar>();
00133     }
00134 
00135     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00136     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
00137     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
00138 
00139     int max = ((offsetEnd - offsetBegin) / 4) - 1;
00140 
00141     QList<QChar> res;
00142 
00143     if(block > max)
00144         return res;
00145 
00146     quint16 unicodeBegin = qFromLittleEndian<quint16>(data + offsetBegin + block*4);
00147     quint16 unicodeEnd = qFromLittleEndian<quint16>(data + offsetBegin + block*4 + 2);
00148 
00149     while(unicodeBegin < unicodeEnd) {
00150         res.append(unicodeBegin);
00151         unicodeBegin++;
00152     }
00153     res.append(unicodeBegin); // Be carefull when unicodeEnd==0xffff
00154 
00155     return res;
00156 }
00157 
00158 QList<int> KCharSelectData::sectionContents(int section)
00159 {
00160     if(!openDataFile()) {
00161         return QList<int>();
00162     }
00163 
00164     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00165     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
00166     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
00167 
00168     int max = ((offsetEnd - offsetBegin) / 4) - 1;
00169 
00170     QList<int> res;
00171 
00172     if(section > max)
00173         return res;
00174 
00175     for(int i = 0; i <= max; i++) {
00176         const quint16 currSection = qFromLittleEndian<quint16>(data + offsetBegin + i*4);
00177         if(currSection == section) {
00178             res.append( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) );
00179         }
00180     }
00181 
00182     return res;
00183 }
00184 
00185 QStringList KCharSelectData::sectionList()
00186 {
00187     if(!openDataFile()) {
00188         return QStringList();
00189     }
00190 
00191     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00192     const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
00193     const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
00194 
00195     const char* data = dataFile.constData();
00196     QStringList list;
00197     quint32 i = stringBegin;
00198     while(i < stringEnd) {
00199         list.append(i18nc("KCharSelect section name", data + i));
00200         i += strlen(data + i) + 1;
00201     }
00202 
00203     return list;
00204 }
00205 
00206 QString KCharSelectData::block(const QChar& c)
00207 {
00208     return blockName(blockIndex(c));
00209 }
00210 
00211 QString KCharSelectData::section(const QChar& c)
00212 {
00213     return sectionName(sectionIndex(blockIndex(c)));
00214 }
00215 
00216 QString KCharSelectData::name(const QChar& c)
00217 {
00218     if(!openDataFile()) {
00219         return QString();
00220     }
00221 
00222     ushort unicode = c.unicode();
00223     if ((unicode >= 0x3400 && unicode <= 0x4DB5)
00224             || (unicode >= 0x4e00 && unicode <= 0x9fa5)) {
00225         // || (unicode >= 0x20000 && unicode <= 0x2A6D6) // useless, since limited to 16 bit
00226         return "CJK UNIFIED IDEOGRAPH-" + QString::number(unicode, 16);
00227     } else if (c >= 0xac00 && c <= 0xd7af) {
00228         /* compute hangul syllable name as per UAX #15 */
00229         int SIndex = c.unicode() - SBase;
00230         int LIndex, VIndex, TIndex;
00231 
00232         if (SIndex < 0 || SIndex >= SCount)
00233             return QString();
00234 
00235         LIndex = SIndex / NCount;
00236         VIndex = (SIndex % NCount) / TCount;
00237         TIndex = SIndex % TCount;
00238 
00239         return QLatin1String("HANGUL SYLLABLE ") + QLatin1String(JAMO_L_TABLE[LIndex])
00240             + QLatin1String(JAMO_V_TABLE[VIndex]) + QLatin1String(JAMO_T_TABLE[TIndex]);
00241     } else if (unicode >= 0xD800 && unicode <= 0xDB7F)
00242         return i18n("<Non Private Use High Surrogate>");
00243     else if (unicode >= 0xDB80 && unicode <= 0xDBFF)
00244         return i18n("<Private Use High Surrogate>");
00245     else if (unicode >= 0xDC00 && unicode <= 0xDFFF)
00246         return i18n("<Low Surrogate>");
00247     else if (unicode >= 0xE000 && unicode <= 0xF8FF)
00248         return i18n("<Private Use>");
00249 //  else if (unicode >= 0xF0000 && unicode <= 0xFFFFD) // 16 bit!
00250 //   return i18n("<Plane 15 Private Use>");
00251 //  else if (unicode >= 0x100000 && unicode <= 0x10FFFD)
00252 //   return i18n("<Plane 16 Private Use>");
00253     else {
00254         const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00255         const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4);
00256         const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8);
00257 
00258         int min = 0;
00259         int mid;
00260         int max = ((offsetEnd - offsetBegin) / 6) - 1;
00261         QString s;
00262 
00263         while (max >= min) {
00264             mid = (min + max) / 2;
00265             const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6);
00266             if (unicode > midUnicode)
00267                 min = mid + 1;
00268             else if (unicode < midUnicode)
00269                 max = mid - 1;
00270             else {
00271                 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2);
00272                 s = QString(dataFile.constData() + offset + 1);
00273                 break;
00274             }
00275         }
00276 
00277         if (s.isNull()) {
00278             return i18n("<not assigned>");
00279         } else {
00280             return s;
00281         }
00282     }
00283 }
00284 
00285 int KCharSelectData::blockIndex(const QChar& c)
00286 {
00287     if(!openDataFile()) {
00288         return 0;
00289     }
00290 
00291     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00292     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20);
00293     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24);
00294     const quint16 unicode = c.unicode();
00295 
00296     int max = ((offsetEnd - offsetBegin) / 4) - 1;
00297 
00298     int i = 0;
00299 
00300     while (unicode > qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) && i < max) {
00301         i++;
00302     }
00303 
00304     return i;
00305 }
00306 
00307 int KCharSelectData::sectionIndex(int block)
00308 {
00309     if(!openDataFile()) {
00310         return 0;
00311     }
00312 
00313     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00314     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28);
00315     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32);
00316 
00317     int max = ((offsetEnd - offsetBegin) / 4) - 1;
00318 
00319     for(int i = 0; i <= max; i++) {
00320         if( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) == block) {
00321             return qFromLittleEndian<quint16>(data + offsetBegin + i*4);
00322         }
00323     }
00324 
00325     return 0;
00326 }
00327 
00328 QString KCharSelectData::blockName(int index)
00329 {
00330     if(!openDataFile()) {
00331         return QString();
00332     }
00333 
00334     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00335     const quint32 stringBegin = qFromLittleEndian<quint32>(udata+16);
00336     const quint32 stringEnd = qFromLittleEndian<quint32>(udata+20);
00337 
00338     quint32 i = stringBegin;
00339     int currIndex = 0;
00340 
00341     const char* data = dataFile.constData();
00342     while(i < stringEnd && currIndex < index) {
00343         i += strlen(data + i) + 1;
00344         currIndex++;
00345     }
00346 
00347     return i18nc("KCharselect unicode block name", data + i);
00348 }
00349 
00350 QString KCharSelectData::sectionName(int index)
00351 {
00352     if(!openDataFile()) {
00353         return QString();
00354     }
00355 
00356     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00357     const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24);
00358     const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28);
00359 
00360     quint32 i = stringBegin;
00361     int currIndex = 0;
00362 
00363     const char* data = dataFile.constData();
00364     while(i < stringEnd && currIndex < index) {
00365         i += strlen(data + i) + 1;
00366         currIndex++;
00367     }
00368 
00369     return i18nc("KCharselect unicode section name", data + i);
00370 }
00371 
00372 QStringList KCharSelectData::aliases(const QChar& c)
00373 {
00374     if(!openDataFile()) {
00375         return QStringList();
00376     }
00377     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00378     const int detailIndex = getDetailIndex(c);
00379     if(detailIndex == 0) {
00380         return QStringList();
00381     }
00382 
00383     const quint8 count = * (quint8 *)(udata + detailIndex + 6);
00384     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 2);
00385 
00386     QStringList aliases;
00387 
00388     const char* data = dataFile.constData();
00389     for (int i = 0;  i < count;  i++) {
00390         aliases.append(QString::fromLatin1(data + offset));
00391         offset += strlen(data + offset) + 1;
00392     }
00393     return aliases;
00394 }
00395 
00396 QStringList KCharSelectData::notes(const QChar& c)
00397 {
00398     if(!openDataFile()) {
00399         return QStringList();
00400     }
00401     const int detailIndex = getDetailIndex(c);
00402     if(detailIndex == 0) {
00403         return QStringList();
00404     }
00405 
00406     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00407     const quint8 count = * (quint8 *)(udata + detailIndex + 11);
00408     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 7);
00409 
00410     QStringList notes;
00411 
00412     const char* data = dataFile.constData();
00413     for (int i = 0;  i < count;  i++) {
00414         notes.append(QString::fromLatin1(data + offset));
00415         offset += strlen(data + offset) + 1;
00416     }
00417 
00418     return notes;
00419 }
00420 
00421 QList<QChar> KCharSelectData::seeAlso(const QChar& c)
00422 {
00423     if(!openDataFile()) {
00424         return QList<QChar>();
00425     }
00426     const int detailIndex = getDetailIndex(c);
00427     if(detailIndex == 0) {
00428         return QList<QChar>();
00429     }
00430 
00431     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00432     const quint8 count = * (quint8 *)(udata + detailIndex + 26);
00433     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 22);
00434 
00435     QList<QChar> seeAlso;
00436 
00437     for (int i = 0;  i < count;  i++) {
00438         seeAlso.append(qFromLittleEndian<quint16> (udata + offset));
00439         offset += 2;
00440     }
00441 
00442     return seeAlso;
00443 }
00444 
00445 QStringList KCharSelectData::equivalents(const QChar& c)
00446 {
00447     if(!openDataFile()) {
00448         return QStringList();
00449     }
00450     const int detailIndex = getDetailIndex(c);
00451     if(detailIndex == 0) {
00452         return QStringList();
00453     }
00454 
00455     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00456     const quint8 count = * (quint8 *)(udata + detailIndex + 21);
00457     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 17);
00458 
00459     QStringList equivalents;
00460 
00461     const char* data = dataFile.constData();
00462     for (int i = 0;  i < count;  i++) {
00463         equivalents.append(QString::fromLatin1(data + offset));
00464         offset += strlen(data + offset) + 1;
00465     }
00466 
00467     return equivalents;
00468 }
00469 
00470 QStringList KCharSelectData::approximateEquivalents(const QChar& c)
00471 {
00472     if(!openDataFile()) {
00473         return QStringList();
00474     }
00475     const int detailIndex = getDetailIndex(c);
00476     if(detailIndex == 0) {
00477         return QStringList();
00478     }
00479 
00480     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00481     const quint8 count = * (quint8 *)(udata + detailIndex + 16);
00482     quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 12);
00483 
00484     QStringList approxEquivalents;
00485 
00486     const char* data = dataFile.constData();
00487     for (int i = 0;  i < count;  i++) {
00488         approxEquivalents.append(QString::fromLatin1(data + offset));
00489         offset += strlen(data + offset) + 1;
00490     }
00491 
00492     return approxEquivalents;
00493 }
00494 
00495 QStringList KCharSelectData::unihanInfo(const QChar& c)
00496 {
00497     if(!openDataFile()) {
00498         return QStringList();
00499     }
00500 
00501     const char* data = dataFile.constData();
00502     const uchar* udata = reinterpret_cast<const uchar*>(data);
00503     const quint32 offsetBegin = qFromLittleEndian<quint32>(udata+36);
00504     const quint32 offsetEnd = dataFile.size();
00505 
00506     int min = 0;
00507     int mid;
00508     int max = ((offsetEnd - offsetBegin) / 30) - 1;
00509     quint16 unicode = c.unicode();
00510 
00511     while (max >= min) {
00512         mid = (min + max) / 2;
00513         const quint16 midUnicode = qFromLittleEndian<quint16>(udata + offsetBegin + mid*30);
00514         if (unicode > midUnicode)
00515             min = mid + 1;
00516         else if (unicode < midUnicode)
00517             max = mid - 1;
00518         else {
00519             QStringList res;
00520             for(int i = 0; i < 7; i++) {
00521                 quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + mid*30 + 2 + i*4);
00522                 if(offset != 0) {
00523                     res.append(QString::fromLatin1(data + offset));
00524                 } else {
00525                     res.append(QString());
00526                 }
00527             }
00528             return res;
00529         }
00530     }
00531 
00532     return QStringList();
00533 }
00534 
00535 QChar::Category KCharSelectData::category(const QChar& c)
00536 {
00537     if(!openDataFile()) {
00538         return c.category();
00539     }
00540 
00541     ushort unicode = c.unicode();
00542 
00543     const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData());
00544     const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4);
00545     const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8);
00546 
00547     int min = 0;
00548     int mid;
00549     int max = ((offsetEnd - offsetBegin) / 6) - 1;
00550     QString s;
00551 
00552     while (max >= min) {
00553         mid = (min + max) / 2;
00554         const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6);
00555         if (unicode > midUnicode)
00556             min = mid + 1;
00557         else if (unicode < midUnicode)
00558             max = mid - 1;
00559         else {
00560             quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2);
00561             const quint8 categoryCode = * (quint8 *)(data + offset);
00562             return QChar::Category(categoryCode);
00563         }
00564     }
00565 
00566     return c.category();
00567 }
00568 
00569 bool KCharSelectData::isPrint(const QChar& c)
00570 {
00571     QChar::Category cat = category(c);
00572     return !(cat == QChar::Other_Control || cat == QChar::Other_NotAssigned);
00573 }
00574 
00575 bool KCharSelectData::isDisplayable(const QChar& c)
00576 {
00577     // Qt internally uses U+FDD0 and U+FDD1 to mark the beginning and the end of frames.
00578     // They should be seen as non-printable characters, as trying to display them leads
00579     //  to a crash caused by a Qt "noBlockInString" assertion.
00580     if(c == 0xFDD0 || c == 0xFDD1)
00581         return false;
00582 
00583     return !isIgnorable(c) && isPrint(c);
00584 }
00585 
00586 bool KCharSelectData::isIgnorable(const QChar& c)
00587 {
00588     /*
00589      * According to the Unicode standard, Default Ignorable Code Points
00590      * should be ignored unless explicitly supported. For example, U+202E
00591      * RIGHT-TO-LEFT-OVERRIDE ir printable according to Qt, but displaying
00592      * it gives the undesired effect of all text being turned RTL. We do not
00593      * have a way to "explicitly" support it, so we will treat it as
00594      * non-printable.
00595      *
00596      * There is a list of these on
00597      * http://unicode.org/Public/UNIDATA/DerivedCoreProperties.txt under the
00598      * property Default_Ignorable_Code_Point.
00599      */
00600 
00601     //NOTE: not very nice to hardcode these here; is it worth it to modify
00602     //      the binary data file to hold them?
00603     return c == 0x00AD || c == 0x034F || c == 0x115F || c == 0x1160 ||
00604            c == 0x17B4 || c == 0x17B5 || (c >= 0x180B && c <= 0x180D) ||
00605            (c >= 0x200B && c <= 0x200F) || (c >= 0x202A && c <= 0x202E) ||
00606            (c >= 0x2060 && c <= 0x206F) || c == 0x3164 ||
00607            (c >= 0xFE00 && c <= 0xFE0F) || c == 0xFEFF || c == 0xFFA0 ||
00608            (c >= 0xFFF0 && c <= 0xFFF8);
00609 }
00610 
00611 bool KCharSelectData::isCombining(const QChar &c)
00612 {
00613     return section(c) == i18nc("KCharSelect section name", "Combining Diacritical Marks");
00614     //FIXME: this is an imperfect test. There are many combining characters 
00615     //       that are outside of this section. See Grapheme_Extend in
00616     //       http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt
00617 }
00618 
00619 QString KCharSelectData::display(const QChar &c, const QFont &font)
00620 {
00621     if (!isDisplayable(c)) {
00622         return QString("<b>") + i18n("Non-printable") + "</b>";
00623     } else {
00624         QString s = QString("<font size=\"+4\" face=\"") + font.family() + "\">";
00625         if (isCombining(c)) {
00626             s += displayCombining(c);
00627         } else {
00628             s += "&#" + QString::number(c.unicode()) + ';';
00629         }
00630         s += "</font>";
00631         return s;
00632     }
00633 }
00634 
00635 QString KCharSelectData::displayCombining(const QChar &c)
00636 {
00637     /*
00638      * The purpose of this is to make it easier to see how a combining
00639      * character affects the text around it.
00640      * The initial plan was to use U+25CC DOTTED CIRCLE for this purpose,
00641      * as seen in pdfs from Unicode, but there seem to be a lot of alignment
00642      * problems with that.
00643      *
00644      * Eventually, it would be nice to determine whether the character
00645      * combines to the left or to the right, etc.
00646      */
00647     QString s = "&nbsp;&#" + QString::number(c.unicode()) + ";&nbsp;" +
00648                 " (ab&#" + QString::number(c.unicode()) + ";c)";
00649     return s;
00650 }
00651 
00652 QString KCharSelectData::categoryText(QChar::Category category)
00653 {
00654     switch (category) {
00655     case QChar::Other_Control: return i18n("Other, Control");
00656     case QChar::Other_Format: return i18n("Other, Format");
00657     case QChar::Other_NotAssigned: return i18n("Other, Not Assigned");
00658     case QChar::Other_PrivateUse: return i18n("Other, Private Use");
00659     case QChar::Other_Surrogate: return i18n("Other, Surrogate");
00660     case QChar::Letter_Lowercase: return i18n("Letter, Lowercase");
00661     case QChar::Letter_Modifier: return i18n("Letter, Modifier");
00662     case QChar::Letter_Other: return i18n("Letter, Other");
00663     case QChar::Letter_Titlecase: return i18n("Letter, Titlecase");
00664     case QChar::Letter_Uppercase: return i18n("Letter, Uppercase");
00665     case QChar::Mark_SpacingCombining: return i18n("Mark, Spacing Combining");
00666     case QChar::Mark_Enclosing: return i18n("Mark, Enclosing");
00667     case QChar::Mark_NonSpacing: return i18n("Mark, Non-Spacing");
00668     case QChar::Number_DecimalDigit: return i18n("Number, Decimal Digit");
00669     case QChar::Number_Letter: return i18n("Number, Letter");
00670     case QChar::Number_Other: return i18n("Number, Other");
00671     case QChar::Punctuation_Connector: return i18n("Punctuation, Connector");
00672     case QChar::Punctuation_Dash: return i18n("Punctuation, Dash");
00673     case QChar::Punctuation_Close: return i18n("Punctuation, Close");
00674     case QChar::Punctuation_FinalQuote: return i18n("Punctuation, Final Quote");
00675     case QChar::Punctuation_InitialQuote: return i18n("Punctuation, Initial Quote");
00676     case QChar::Punctuation_Other: return i18n("Punctuation, Other");
00677     case QChar::Punctuation_Open: return i18n("Punctuation, Open");
00678     case QChar::Symbol_Currency: return i18n("Symbol, Currency");
00679     case QChar::Symbol_Modifier: return i18n("Symbol, Modifier");
00680     case QChar::Symbol_Math: return i18n("Symbol, Math");
00681     case QChar::Symbol_Other: return i18n("Symbol, Other");
00682     case QChar::Separator_Line: return i18n("Separator, Line");
00683     case QChar::Separator_Paragraph: return i18n("Separator, Paragraph");
00684     case QChar::Separator_Space: return i18n("Separator, Space");
00685     default: return i18n("Unknown");
00686     }
00687 }
00688 
00689 QList<QChar> KCharSelectData::find(const QString& needle)
00690 {
00691     QSet<quint16> result;
00692 
00693     QList<QChar> returnRes;
00694     QString simplified = needle.simplified();
00695     QStringList searchStrings = splitString(needle.simplified());
00696 
00697     if(simplified.length() == 1) {
00698         // search for hex representation of the character
00699         searchStrings = QStringList(formatCode(simplified.at(0).unicode()));
00700     }
00701 
00702     if (searchStrings.count() == 0) {
00703         return returnRes;
00704     }
00705 
00706     QRegExp regExp("^(|u\\+|U\\+|0x|0X)([A-Fa-f0-9]{4})$");
00707     foreach(const QString &s, searchStrings) {
00708         if(regExp.exactMatch(s)) {
00709             returnRes.append(regExp.cap(2).toInt(0, 16));
00710             // search for "1234" instead of "0x1234"
00711             if (s.length() == 6) {
00712                 searchStrings[searchStrings.indexOf(s)] = regExp.cap(2);
00713             }
00714         }
00715         // try to parse string as decimal number
00716         bool ok;
00717         int unicode = s.toInt(&ok);
00718         if (ok && unicode >= 0 && unicode <= 0xFFFF) {
00719             returnRes.append(unicode);
00720         }
00721     }
00722 
00723     bool firstSubString = true;
00724     foreach(const QString &s, searchStrings) {
00725         QSet<quint16> partResult = getMatchingChars(s.toLower());
00726         if (firstSubString) {
00727             result = partResult;
00728             firstSubString = false;
00729         } else {
00730             result = result.intersect(partResult);
00731         }
00732     }
00733 
00734     // remove results found by matching the code point to prevent duplicate results
00735     // while letting these characters stay at the beginning
00736     foreach(const QChar &c, returnRes) {
00737         result.remove(c.unicode());
00738     }
00739 
00740     QList<quint16> sortedResult = result.toList();
00741     qSort(sortedResult);
00742 
00743     foreach(const quint16 &c, sortedResult) {
00744         returnRes.append(c);
00745     }
00746 
00747     return returnRes;
00748 }
00749 
00750 QSet<quint16> KCharSelectData::getMatchingChars(const QString& s)
00751 {
00752     futureIndex.waitForFinished();
00753     const Index index = futureIndex;
00754     Index::const_iterator pos = index.lowerBound(s);
00755     QSet<quint16> result;
00756 
00757     while (pos != index.constEnd() && pos.key().startsWith(s)) {
00758         foreach (const quint16 &c, pos.value()) {
00759             result.insert(c);
00760         }
00761         ++pos;
00762     }
00763 
00764     return result;
00765 }
00766 
00767 QStringList KCharSelectData::splitString(const QString& s)
00768 {
00769     QStringList result;
00770     int start = 0;
00771     int end = 0;
00772     int length = s.length();
00773     while (end < length) {
00774         while (end < length && (s[end].isLetterOrNumber() || s[end] == '+')) {
00775             end++;
00776         }
00777         if (start != end) {
00778             result.append(s.mid(start, end - start));
00779         }
00780         start = end;
00781         while (end < length && !(s[end].isLetterOrNumber() || s[end] == '+')) {
00782             end++;
00783             start++;
00784         }
00785     }
00786     return result;
00787 }
00788 
00789 void KCharSelectData::appendToIndex(Index *index, quint16 unicode, const QString& s)
00790 {
00791     const QStringList strings = splitString(s);
00792     foreach(const QString &s, strings) {
00793         (*index)[s.toLower()].append(unicode);
00794     }
00795 }
00796 
00797 Index KCharSelectData::createIndex(const QByteArray& dataFile)
00798 {
00799     Index i;
00800 
00801     // character names
00802     const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData());
00803     const char* data = dataFile.constData();
00804     const quint32 nameOffsetBegin = qFromLittleEndian<quint32>(udata+4);
00805     const quint32 nameOffsetEnd = qFromLittleEndian<quint32>(udata+8);
00806 
00807     int max = ((nameOffsetEnd - nameOffsetBegin) / 6) - 1;
00808 
00809     for (int pos = 0; pos <= max; pos++) {
00810         const quint16 unicode = qFromLittleEndian<quint16>(udata + nameOffsetBegin + pos*6);
00811         quint32 offset = qFromLittleEndian<quint32>(udata + nameOffsetBegin + pos*6 + 2);
00812         appendToIndex(&i, unicode, QString(data + offset + 1));
00813     }
00814 
00815     // details
00816     const quint32 detailsOffsetBegin = qFromLittleEndian<quint32>(udata+12);
00817     const quint32 detailsOffsetEnd = qFromLittleEndian<quint32>(udata+16);
00818 
00819     max = ((detailsOffsetEnd - detailsOffsetBegin) / 27) - 1;
00820 
00821     for (int pos = 0; pos <= max; pos++) {
00822         const quint16 unicode = qFromLittleEndian<quint16>(udata + detailsOffsetBegin + pos*27);
00823 
00824         // aliases
00825         const quint8 aliasCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 6);
00826         quint32 aliasOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 2);
00827 
00828         for (int j = 0;  j < aliasCount;  j++) {
00829             appendToIndex(&i, unicode, QString::fromLatin1(data + aliasOffset));
00830             aliasOffset += strlen(data + aliasOffset) + 1;
00831         }
00832 
00833         // notes
00834         const quint8 notesCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 11);
00835         quint32 notesOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 7);
00836 
00837         for (int j = 0;  j < notesCount;  j++) {
00838             appendToIndex(&i, unicode, QString::fromLatin1(data + notesOffset));
00839             notesOffset += strlen(data + notesOffset) + 1;
00840         }
00841 
00842         // approximate equivalents
00843         const quint8 apprCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 16);
00844         quint32 apprOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 12);
00845 
00846         for (int j = 0;  j < apprCount;  j++) {
00847             appendToIndex(&i, unicode, QString::fromLatin1(data + apprOffset));
00848             apprOffset += strlen(data + apprOffset) + 1;
00849         }
00850 
00851         // equivalents
00852         const quint8 equivCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 21);
00853         quint32 equivOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 17);
00854 
00855         for (int j = 0;  j < equivCount;  j++) {
00856             appendToIndex(&i, unicode, QString::fromLatin1(data + equivOffset));
00857             equivOffset += strlen(data + equivOffset) + 1;
00858         }
00859 
00860         // see also - convert to string (hex)
00861         const quint8 seeAlsoCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 26);
00862         quint32 seeAlsoOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 22);
00863 
00864         for (int j = 0;  j < seeAlsoCount;  j++) {
00865             quint16 seeAlso = qFromLittleEndian<quint16> (udata + seeAlsoOffset);
00866             appendToIndex(&i, unicode, formatCode(seeAlso, 4, QString()));
00867             equivOffset += strlen(data + equivOffset) + 1;
00868         }
00869     }
00870 
00871     // unihan data
00872     // temporary disabled due to the huge amount of data
00873 //     const quint32 unihanOffsetBegin = qFromLittleEndian<quint32>(udata+36);
00874 //     const quint32 unihanOffsetEnd = dataFile.size();
00875 //     max = ((unihanOffsetEnd - unihanOffsetBegin) / 30) - 1;
00876 //
00877 //     for (int pos = 0; pos <= max; pos++) {
00878 //         const quint16 unicode = qFromLittleEndian<quint16>(udata + unihanOffsetBegin + pos*30);
00879 //         for(int j = 0; j < 7; j++) {
00880 //             quint32 offset = qFromLittleEndian<quint32>(udata + unihanOffsetBegin + pos*30 + 2 + j*4);
00881 //             if(offset != 0) {
00882 //                 appendToIndex(&i, unicode, QString::fromUtf8(data + offset));
00883 //             }
00884 //         }
00885 //     }
00886 
00887     return i;
00888 }

KDEUI

Skip menu "KDEUI"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.7.5
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal