KDEUI
kcharselectdata.cpp
Go to the documentation of this file.
00001 /* This file is part of the KDE libraries 00002 00003 Copyright (C) 2007 Daniel Laidig <d.laidig@gmx.de> 00004 00005 This library is free software; you can redistribute it and/or 00006 modify it under the terms of the GNU Library General Public 00007 License as published by the Free Software Foundation; either 00008 version 2 of the License, or (at your option) any later version. 00009 00010 This library is distributed in the hope that it will be useful, 00011 but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 Library General Public License for more details. 00014 00015 You should have received a copy of the GNU Library General Public License 00016 along with this library; see the file COPYING.LIB. If not, write to 00017 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00018 Boston, MA 02110-1301, USA. 00019 */ 00020 00021 #include "kcharselectdata_p.h" 00022 00023 #include <QStringList> 00024 #include <QFile> 00025 #include <qendian.h> 00026 #include <QtConcurrentRun> 00027 00028 #include <string.h> 00029 #include <klocalizedstring.h> 00030 #include <kstandarddirs.h> 00031 00032 /* constants for hangul (de)composition, see UAX #15 */ 00033 #define SBase 0xAC00 00034 #define LBase 0x1100 00035 #define VBase 0x1161 00036 #define TBase 0x11A7 00037 #define LCount 19 00038 #define VCount 21 00039 #define TCount 28 00040 #define NCount (VCount * TCount) 00041 #define SCount (LCount * NCount) 00042 00043 static const char JAMO_L_TABLE[][4] = 00044 { 00045 "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", 00046 "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H" 00047 }; 00048 00049 static const char JAMO_V_TABLE[][4] = 00050 { 00051 "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", 00052 "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI", 00053 "YU", "EU", "YI", "I" 00054 }; 00055 00056 static const char JAMO_T_TABLE[][4] = 00057 { 00058 "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM", 00059 "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", 00060 "S", "SS", "NG", "J", "C", "K", "T", "P", "H" 00061 }; 00062 00063 bool KCharSelectData::openDataFile() 00064 { 00065 if(!dataFile.isEmpty()) { 00066 return true; 00067 } else { 00068 QFile file(KStandardDirs::locate("data", "kcharselect/kcharselect-data")); 00069 if (!file.open(QIODevice::ReadOnly)) { 00070 return false; 00071 } 00072 dataFile = file.readAll(); 00073 file.close(); 00074 futureIndex = QtConcurrent::run(this, &KCharSelectData::createIndex, dataFile); 00075 return true; 00076 } 00077 } 00078 00079 quint32 KCharSelectData::getDetailIndex(const QChar& c) const 00080 { 00081 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData()); 00082 // Convert from little-endian, so that this code works on PPC too. 00083 // http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=482286 00084 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+12); 00085 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+16); 00086 00087 int min = 0; 00088 int mid; 00089 int max = ((offsetEnd - offsetBegin) / 27) - 1; 00090 00091 quint16 unicode = c.unicode(); 00092 00093 static quint16 most_recent_searched; 00094 static quint32 most_recent_result; 00095 00096 00097 if (unicode == most_recent_searched) 00098 return most_recent_result; 00099 00100 most_recent_searched = unicode; 00101 00102 while (max >= min) { 00103 mid = (min + max) / 2; 00104 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*27); 00105 if (unicode > midUnicode) 00106 min = mid + 1; 00107 else if (unicode < midUnicode) 00108 max = mid - 1; 00109 else { 00110 most_recent_result = offsetBegin + mid*27; 00111 00112 return most_recent_result; 00113 } 00114 } 00115 00116 most_recent_result = 0; 00117 return 0; 00118 } 00119 00120 QString KCharSelectData::formatCode(ushort code, int length, const QString& prefix, int base) 00121 { 00122 QString s = QString::number(code, base).toUpper(); 00123 while (s.size() < length) 00124 s.prepend('0'); 00125 s.prepend(prefix); 00126 return s; 00127 } 00128 00129 QList<QChar> KCharSelectData::blockContents(int block) 00130 { 00131 if(!openDataFile()) { 00132 return QList<QChar>(); 00133 } 00134 00135 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData()); 00136 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20); 00137 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24); 00138 00139 int max = ((offsetEnd - offsetBegin) / 4) - 1; 00140 00141 QList<QChar> res; 00142 00143 if(block > max) 00144 return res; 00145 00146 quint16 unicodeBegin = qFromLittleEndian<quint16>(data + offsetBegin + block*4); 00147 quint16 unicodeEnd = qFromLittleEndian<quint16>(data + offsetBegin + block*4 + 2); 00148 00149 while(unicodeBegin < unicodeEnd) { 00150 res.append(unicodeBegin); 00151 unicodeBegin++; 00152 } 00153 res.append(unicodeBegin); // Be carefull when unicodeEnd==0xffff 00154 00155 return res; 00156 } 00157 00158 QList<int> KCharSelectData::sectionContents(int section) 00159 { 00160 if(!openDataFile()) { 00161 return QList<int>(); 00162 } 00163 00164 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData()); 00165 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28); 00166 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32); 00167 00168 int max = ((offsetEnd - offsetBegin) / 4) - 1; 00169 00170 QList<int> res; 00171 00172 if(section > max) 00173 return res; 00174 00175 for(int i = 0; i <= max; i++) { 00176 const quint16 currSection = qFromLittleEndian<quint16>(data + offsetBegin + i*4); 00177 if(currSection == section) { 00178 res.append( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) ); 00179 } 00180 } 00181 00182 return res; 00183 } 00184 00185 QStringList KCharSelectData::sectionList() 00186 { 00187 if(!openDataFile()) { 00188 return QStringList(); 00189 } 00190 00191 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00192 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24); 00193 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28); 00194 00195 const char* data = dataFile.constData(); 00196 QStringList list; 00197 quint32 i = stringBegin; 00198 while(i < stringEnd) { 00199 list.append(i18nc("KCharSelect section name", data + i)); 00200 i += strlen(data + i) + 1; 00201 } 00202 00203 return list; 00204 } 00205 00206 QString KCharSelectData::block(const QChar& c) 00207 { 00208 return blockName(blockIndex(c)); 00209 } 00210 00211 QString KCharSelectData::section(const QChar& c) 00212 { 00213 return sectionName(sectionIndex(blockIndex(c))); 00214 } 00215 00216 QString KCharSelectData::name(const QChar& c) 00217 { 00218 if(!openDataFile()) { 00219 return QString(); 00220 } 00221 00222 ushort unicode = c.unicode(); 00223 if ((unicode >= 0x3400 && unicode <= 0x4DB5) 00224 || (unicode >= 0x4e00 && unicode <= 0x9fa5)) { 00225 // || (unicode >= 0x20000 && unicode <= 0x2A6D6) // useless, since limited to 16 bit 00226 return "CJK UNIFIED IDEOGRAPH-" + QString::number(unicode, 16); 00227 } else if (c >= 0xac00 && c <= 0xd7af) { 00228 /* compute hangul syllable name as per UAX #15 */ 00229 int SIndex = c.unicode() - SBase; 00230 int LIndex, VIndex, TIndex; 00231 00232 if (SIndex < 0 || SIndex >= SCount) 00233 return QString(); 00234 00235 LIndex = SIndex / NCount; 00236 VIndex = (SIndex % NCount) / TCount; 00237 TIndex = SIndex % TCount; 00238 00239 return QLatin1String("HANGUL SYLLABLE ") + QLatin1String(JAMO_L_TABLE[LIndex]) 00240 + QLatin1String(JAMO_V_TABLE[VIndex]) + QLatin1String(JAMO_T_TABLE[TIndex]); 00241 } else if (unicode >= 0xD800 && unicode <= 0xDB7F) 00242 return i18n("<Non Private Use High Surrogate>"); 00243 else if (unicode >= 0xDB80 && unicode <= 0xDBFF) 00244 return i18n("<Private Use High Surrogate>"); 00245 else if (unicode >= 0xDC00 && unicode <= 0xDFFF) 00246 return i18n("<Low Surrogate>"); 00247 else if (unicode >= 0xE000 && unicode <= 0xF8FF) 00248 return i18n("<Private Use>"); 00249 // else if (unicode >= 0xF0000 && unicode <= 0xFFFFD) // 16 bit! 00250 // return i18n("<Plane 15 Private Use>"); 00251 // else if (unicode >= 0x100000 && unicode <= 0x10FFFD) 00252 // return i18n("<Plane 16 Private Use>"); 00253 else { 00254 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData()); 00255 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4); 00256 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8); 00257 00258 int min = 0; 00259 int mid; 00260 int max = ((offsetEnd - offsetBegin) / 6) - 1; 00261 QString s; 00262 00263 while (max >= min) { 00264 mid = (min + max) / 2; 00265 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6); 00266 if (unicode > midUnicode) 00267 min = mid + 1; 00268 else if (unicode < midUnicode) 00269 max = mid - 1; 00270 else { 00271 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2); 00272 s = QString(dataFile.constData() + offset + 1); 00273 break; 00274 } 00275 } 00276 00277 if (s.isNull()) { 00278 return i18n("<not assigned>"); 00279 } else { 00280 return s; 00281 } 00282 } 00283 } 00284 00285 int KCharSelectData::blockIndex(const QChar& c) 00286 { 00287 if(!openDataFile()) { 00288 return 0; 00289 } 00290 00291 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData()); 00292 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20); 00293 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24); 00294 const quint16 unicode = c.unicode(); 00295 00296 int max = ((offsetEnd - offsetBegin) / 4) - 1; 00297 00298 int i = 0; 00299 00300 while (unicode > qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) && i < max) { 00301 i++; 00302 } 00303 00304 return i; 00305 } 00306 00307 int KCharSelectData::sectionIndex(int block) 00308 { 00309 if(!openDataFile()) { 00310 return 0; 00311 } 00312 00313 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData()); 00314 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28); 00315 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32); 00316 00317 int max = ((offsetEnd - offsetBegin) / 4) - 1; 00318 00319 for(int i = 0; i <= max; i++) { 00320 if( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) == block) { 00321 return qFromLittleEndian<quint16>(data + offsetBegin + i*4); 00322 } 00323 } 00324 00325 return 0; 00326 } 00327 00328 QString KCharSelectData::blockName(int index) 00329 { 00330 if(!openDataFile()) { 00331 return QString(); 00332 } 00333 00334 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00335 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+16); 00336 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+20); 00337 00338 quint32 i = stringBegin; 00339 int currIndex = 0; 00340 00341 const char* data = dataFile.constData(); 00342 while(i < stringEnd && currIndex < index) { 00343 i += strlen(data + i) + 1; 00344 currIndex++; 00345 } 00346 00347 return i18nc("KCharselect unicode block name", data + i); 00348 } 00349 00350 QString KCharSelectData::sectionName(int index) 00351 { 00352 if(!openDataFile()) { 00353 return QString(); 00354 } 00355 00356 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00357 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24); 00358 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28); 00359 00360 quint32 i = stringBegin; 00361 int currIndex = 0; 00362 00363 const char* data = dataFile.constData(); 00364 while(i < stringEnd && currIndex < index) { 00365 i += strlen(data + i) + 1; 00366 currIndex++; 00367 } 00368 00369 return i18nc("KCharselect unicode section name", data + i); 00370 } 00371 00372 QStringList KCharSelectData::aliases(const QChar& c) 00373 { 00374 if(!openDataFile()) { 00375 return QStringList(); 00376 } 00377 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00378 const int detailIndex = getDetailIndex(c); 00379 if(detailIndex == 0) { 00380 return QStringList(); 00381 } 00382 00383 const quint8 count = * (quint8 *)(udata + detailIndex + 6); 00384 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 2); 00385 00386 QStringList aliases; 00387 00388 const char* data = dataFile.constData(); 00389 for (int i = 0; i < count; i++) { 00390 aliases.append(QString::fromLatin1(data + offset)); 00391 offset += strlen(data + offset) + 1; 00392 } 00393 return aliases; 00394 } 00395 00396 QStringList KCharSelectData::notes(const QChar& c) 00397 { 00398 if(!openDataFile()) { 00399 return QStringList(); 00400 } 00401 const int detailIndex = getDetailIndex(c); 00402 if(detailIndex == 0) { 00403 return QStringList(); 00404 } 00405 00406 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00407 const quint8 count = * (quint8 *)(udata + detailIndex + 11); 00408 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 7); 00409 00410 QStringList notes; 00411 00412 const char* data = dataFile.constData(); 00413 for (int i = 0; i < count; i++) { 00414 notes.append(QString::fromLatin1(data + offset)); 00415 offset += strlen(data + offset) + 1; 00416 } 00417 00418 return notes; 00419 } 00420 00421 QList<QChar> KCharSelectData::seeAlso(const QChar& c) 00422 { 00423 if(!openDataFile()) { 00424 return QList<QChar>(); 00425 } 00426 const int detailIndex = getDetailIndex(c); 00427 if(detailIndex == 0) { 00428 return QList<QChar>(); 00429 } 00430 00431 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00432 const quint8 count = * (quint8 *)(udata + detailIndex + 26); 00433 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 22); 00434 00435 QList<QChar> seeAlso; 00436 00437 for (int i = 0; i < count; i++) { 00438 seeAlso.append(qFromLittleEndian<quint16> (udata + offset)); 00439 offset += 2; 00440 } 00441 00442 return seeAlso; 00443 } 00444 00445 QStringList KCharSelectData::equivalents(const QChar& c) 00446 { 00447 if(!openDataFile()) { 00448 return QStringList(); 00449 } 00450 const int detailIndex = getDetailIndex(c); 00451 if(detailIndex == 0) { 00452 return QStringList(); 00453 } 00454 00455 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00456 const quint8 count = * (quint8 *)(udata + detailIndex + 21); 00457 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 17); 00458 00459 QStringList equivalents; 00460 00461 const char* data = dataFile.constData(); 00462 for (int i = 0; i < count; i++) { 00463 equivalents.append(QString::fromLatin1(data + offset)); 00464 offset += strlen(data + offset) + 1; 00465 } 00466 00467 return equivalents; 00468 } 00469 00470 QStringList KCharSelectData::approximateEquivalents(const QChar& c) 00471 { 00472 if(!openDataFile()) { 00473 return QStringList(); 00474 } 00475 const int detailIndex = getDetailIndex(c); 00476 if(detailIndex == 0) { 00477 return QStringList(); 00478 } 00479 00480 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00481 const quint8 count = * (quint8 *)(udata + detailIndex + 16); 00482 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 12); 00483 00484 QStringList approxEquivalents; 00485 00486 const char* data = dataFile.constData(); 00487 for (int i = 0; i < count; i++) { 00488 approxEquivalents.append(QString::fromLatin1(data + offset)); 00489 offset += strlen(data + offset) + 1; 00490 } 00491 00492 return approxEquivalents; 00493 } 00494 00495 QStringList KCharSelectData::unihanInfo(const QChar& c) 00496 { 00497 if(!openDataFile()) { 00498 return QStringList(); 00499 } 00500 00501 const char* data = dataFile.constData(); 00502 const uchar* udata = reinterpret_cast<const uchar*>(data); 00503 const quint32 offsetBegin = qFromLittleEndian<quint32>(udata+36); 00504 const quint32 offsetEnd = dataFile.size(); 00505 00506 int min = 0; 00507 int mid; 00508 int max = ((offsetEnd - offsetBegin) / 30) - 1; 00509 quint16 unicode = c.unicode(); 00510 00511 while (max >= min) { 00512 mid = (min + max) / 2; 00513 const quint16 midUnicode = qFromLittleEndian<quint16>(udata + offsetBegin + mid*30); 00514 if (unicode > midUnicode) 00515 min = mid + 1; 00516 else if (unicode < midUnicode) 00517 max = mid - 1; 00518 else { 00519 QStringList res; 00520 for(int i = 0; i < 7; i++) { 00521 quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + mid*30 + 2 + i*4); 00522 if(offset != 0) { 00523 res.append(QString::fromLatin1(data + offset)); 00524 } else { 00525 res.append(QString()); 00526 } 00527 } 00528 return res; 00529 } 00530 } 00531 00532 return QStringList(); 00533 } 00534 00535 QChar::Category KCharSelectData::category(const QChar& c) 00536 { 00537 if(!openDataFile()) { 00538 return c.category(); 00539 } 00540 00541 ushort unicode = c.unicode(); 00542 00543 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData()); 00544 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4); 00545 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8); 00546 00547 int min = 0; 00548 int mid; 00549 int max = ((offsetEnd - offsetBegin) / 6) - 1; 00550 QString s; 00551 00552 while (max >= min) { 00553 mid = (min + max) / 2; 00554 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6); 00555 if (unicode > midUnicode) 00556 min = mid + 1; 00557 else if (unicode < midUnicode) 00558 max = mid - 1; 00559 else { 00560 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2); 00561 const quint8 categoryCode = * (quint8 *)(data + offset); 00562 return QChar::Category(categoryCode); 00563 } 00564 } 00565 00566 return c.category(); 00567 } 00568 00569 bool KCharSelectData::isPrint(const QChar& c) 00570 { 00571 QChar::Category cat = category(c); 00572 return !(cat == QChar::Other_Control || cat == QChar::Other_NotAssigned); 00573 } 00574 00575 bool KCharSelectData::isDisplayable(const QChar& c) 00576 { 00577 // Qt internally uses U+FDD0 and U+FDD1 to mark the beginning and the end of frames. 00578 // They should be seen as non-printable characters, as trying to display them leads 00579 // to a crash caused by a Qt "noBlockInString" assertion. 00580 if(c == 0xFDD0 || c == 0xFDD1) 00581 return false; 00582 00583 return !isIgnorable(c) && isPrint(c); 00584 } 00585 00586 bool KCharSelectData::isIgnorable(const QChar& c) 00587 { 00588 /* 00589 * According to the Unicode standard, Default Ignorable Code Points 00590 * should be ignored unless explicitly supported. For example, U+202E 00591 * RIGHT-TO-LEFT-OVERRIDE ir printable according to Qt, but displaying 00592 * it gives the undesired effect of all text being turned RTL. We do not 00593 * have a way to "explicitly" support it, so we will treat it as 00594 * non-printable. 00595 * 00596 * There is a list of these on 00597 * http://unicode.org/Public/UNIDATA/DerivedCoreProperties.txt under the 00598 * property Default_Ignorable_Code_Point. 00599 */ 00600 00601 //NOTE: not very nice to hardcode these here; is it worth it to modify 00602 // the binary data file to hold them? 00603 return c == 0x00AD || c == 0x034F || c == 0x115F || c == 0x1160 || 00604 c == 0x17B4 || c == 0x17B5 || (c >= 0x180B && c <= 0x180D) || 00605 (c >= 0x200B && c <= 0x200F) || (c >= 0x202A && c <= 0x202E) || 00606 (c >= 0x2060 && c <= 0x206F) || c == 0x3164 || 00607 (c >= 0xFE00 && c <= 0xFE0F) || c == 0xFEFF || c == 0xFFA0 || 00608 (c >= 0xFFF0 && c <= 0xFFF8); 00609 } 00610 00611 bool KCharSelectData::isCombining(const QChar &c) 00612 { 00613 return section(c) == i18nc("KCharSelect section name", "Combining Diacritical Marks"); 00614 //FIXME: this is an imperfect test. There are many combining characters 00615 // that are outside of this section. See Grapheme_Extend in 00616 // http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt 00617 } 00618 00619 QString KCharSelectData::display(const QChar &c, const QFont &font) 00620 { 00621 if (!isDisplayable(c)) { 00622 return QString("<b>") + i18n("Non-printable") + "</b>"; 00623 } else { 00624 QString s = QString("<font size=\"+4\" face=\"") + font.family() + "\">"; 00625 if (isCombining(c)) { 00626 s += displayCombining(c); 00627 } else { 00628 s += "&#" + QString::number(c.unicode()) + ';'; 00629 } 00630 s += "</font>"; 00631 return s; 00632 } 00633 } 00634 00635 QString KCharSelectData::displayCombining(const QChar &c) 00636 { 00637 /* 00638 * The purpose of this is to make it easier to see how a combining 00639 * character affects the text around it. 00640 * The initial plan was to use U+25CC DOTTED CIRCLE for this purpose, 00641 * as seen in pdfs from Unicode, but there seem to be a lot of alignment 00642 * problems with that. 00643 * 00644 * Eventually, it would be nice to determine whether the character 00645 * combines to the left or to the right, etc. 00646 */ 00647 QString s = " &#" + QString::number(c.unicode()) + "; " + 00648 " (ab&#" + QString::number(c.unicode()) + ";c)"; 00649 return s; 00650 } 00651 00652 QString KCharSelectData::categoryText(QChar::Category category) 00653 { 00654 switch (category) { 00655 case QChar::Other_Control: return i18n("Other, Control"); 00656 case QChar::Other_Format: return i18n("Other, Format"); 00657 case QChar::Other_NotAssigned: return i18n("Other, Not Assigned"); 00658 case QChar::Other_PrivateUse: return i18n("Other, Private Use"); 00659 case QChar::Other_Surrogate: return i18n("Other, Surrogate"); 00660 case QChar::Letter_Lowercase: return i18n("Letter, Lowercase"); 00661 case QChar::Letter_Modifier: return i18n("Letter, Modifier"); 00662 case QChar::Letter_Other: return i18n("Letter, Other"); 00663 case QChar::Letter_Titlecase: return i18n("Letter, Titlecase"); 00664 case QChar::Letter_Uppercase: return i18n("Letter, Uppercase"); 00665 case QChar::Mark_SpacingCombining: return i18n("Mark, Spacing Combining"); 00666 case QChar::Mark_Enclosing: return i18n("Mark, Enclosing"); 00667 case QChar::Mark_NonSpacing: return i18n("Mark, Non-Spacing"); 00668 case QChar::Number_DecimalDigit: return i18n("Number, Decimal Digit"); 00669 case QChar::Number_Letter: return i18n("Number, Letter"); 00670 case QChar::Number_Other: return i18n("Number, Other"); 00671 case QChar::Punctuation_Connector: return i18n("Punctuation, Connector"); 00672 case QChar::Punctuation_Dash: return i18n("Punctuation, Dash"); 00673 case QChar::Punctuation_Close: return i18n("Punctuation, Close"); 00674 case QChar::Punctuation_FinalQuote: return i18n("Punctuation, Final Quote"); 00675 case QChar::Punctuation_InitialQuote: return i18n("Punctuation, Initial Quote"); 00676 case QChar::Punctuation_Other: return i18n("Punctuation, Other"); 00677 case QChar::Punctuation_Open: return i18n("Punctuation, Open"); 00678 case QChar::Symbol_Currency: return i18n("Symbol, Currency"); 00679 case QChar::Symbol_Modifier: return i18n("Symbol, Modifier"); 00680 case QChar::Symbol_Math: return i18n("Symbol, Math"); 00681 case QChar::Symbol_Other: return i18n("Symbol, Other"); 00682 case QChar::Separator_Line: return i18n("Separator, Line"); 00683 case QChar::Separator_Paragraph: return i18n("Separator, Paragraph"); 00684 case QChar::Separator_Space: return i18n("Separator, Space"); 00685 default: return i18n("Unknown"); 00686 } 00687 } 00688 00689 QList<QChar> KCharSelectData::find(const QString& needle) 00690 { 00691 QSet<quint16> result; 00692 00693 QList<QChar> returnRes; 00694 QString simplified = needle.simplified(); 00695 QStringList searchStrings = splitString(needle.simplified()); 00696 00697 if(simplified.length() == 1) { 00698 // search for hex representation of the character 00699 searchStrings = QStringList(formatCode(simplified.at(0).unicode())); 00700 } 00701 00702 if (searchStrings.count() == 0) { 00703 return returnRes; 00704 } 00705 00706 QRegExp regExp("^(|u\\+|U\\+|0x|0X)([A-Fa-f0-9]{4})$"); 00707 foreach(const QString &s, searchStrings) { 00708 if(regExp.exactMatch(s)) { 00709 returnRes.append(regExp.cap(2).toInt(0, 16)); 00710 // search for "1234" instead of "0x1234" 00711 if (s.length() == 6) { 00712 searchStrings[searchStrings.indexOf(s)] = regExp.cap(2); 00713 } 00714 } 00715 // try to parse string as decimal number 00716 bool ok; 00717 int unicode = s.toInt(&ok); 00718 if (ok && unicode >= 0 && unicode <= 0xFFFF) { 00719 returnRes.append(unicode); 00720 } 00721 } 00722 00723 bool firstSubString = true; 00724 foreach(const QString &s, searchStrings) { 00725 QSet<quint16> partResult = getMatchingChars(s.toLower()); 00726 if (firstSubString) { 00727 result = partResult; 00728 firstSubString = false; 00729 } else { 00730 result = result.intersect(partResult); 00731 } 00732 } 00733 00734 // remove results found by matching the code point to prevent duplicate results 00735 // while letting these characters stay at the beginning 00736 foreach(const QChar &c, returnRes) { 00737 result.remove(c.unicode()); 00738 } 00739 00740 QList<quint16> sortedResult = result.toList(); 00741 qSort(sortedResult); 00742 00743 foreach(const quint16 &c, sortedResult) { 00744 returnRes.append(c); 00745 } 00746 00747 return returnRes; 00748 } 00749 00750 QSet<quint16> KCharSelectData::getMatchingChars(const QString& s) 00751 { 00752 futureIndex.waitForFinished(); 00753 const Index index = futureIndex; 00754 Index::const_iterator pos = index.lowerBound(s); 00755 QSet<quint16> result; 00756 00757 while (pos != index.constEnd() && pos.key().startsWith(s)) { 00758 foreach (const quint16 &c, pos.value()) { 00759 result.insert(c); 00760 } 00761 ++pos; 00762 } 00763 00764 return result; 00765 } 00766 00767 QStringList KCharSelectData::splitString(const QString& s) 00768 { 00769 QStringList result; 00770 int start = 0; 00771 int end = 0; 00772 int length = s.length(); 00773 while (end < length) { 00774 while (end < length && (s[end].isLetterOrNumber() || s[end] == '+')) { 00775 end++; 00776 } 00777 if (start != end) { 00778 result.append(s.mid(start, end - start)); 00779 } 00780 start = end; 00781 while (end < length && !(s[end].isLetterOrNumber() || s[end] == '+')) { 00782 end++; 00783 start++; 00784 } 00785 } 00786 return result; 00787 } 00788 00789 void KCharSelectData::appendToIndex(Index *index, quint16 unicode, const QString& s) 00790 { 00791 const QStringList strings = splitString(s); 00792 foreach(const QString &s, strings) { 00793 (*index)[s.toLower()].append(unicode); 00794 } 00795 } 00796 00797 Index KCharSelectData::createIndex(const QByteArray& dataFile) 00798 { 00799 Index i; 00800 00801 // character names 00802 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00803 const char* data = dataFile.constData(); 00804 const quint32 nameOffsetBegin = qFromLittleEndian<quint32>(udata+4); 00805 const quint32 nameOffsetEnd = qFromLittleEndian<quint32>(udata+8); 00806 00807 int max = ((nameOffsetEnd - nameOffsetBegin) / 6) - 1; 00808 00809 for (int pos = 0; pos <= max; pos++) { 00810 const quint16 unicode = qFromLittleEndian<quint16>(udata + nameOffsetBegin + pos*6); 00811 quint32 offset = qFromLittleEndian<quint32>(udata + nameOffsetBegin + pos*6 + 2); 00812 appendToIndex(&i, unicode, QString(data + offset + 1)); 00813 } 00814 00815 // details 00816 const quint32 detailsOffsetBegin = qFromLittleEndian<quint32>(udata+12); 00817 const quint32 detailsOffsetEnd = qFromLittleEndian<quint32>(udata+16); 00818 00819 max = ((detailsOffsetEnd - detailsOffsetBegin) / 27) - 1; 00820 00821 for (int pos = 0; pos <= max; pos++) { 00822 const quint16 unicode = qFromLittleEndian<quint16>(udata + detailsOffsetBegin + pos*27); 00823 00824 // aliases 00825 const quint8 aliasCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 6); 00826 quint32 aliasOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 2); 00827 00828 for (int j = 0; j < aliasCount; j++) { 00829 appendToIndex(&i, unicode, QString::fromLatin1(data + aliasOffset)); 00830 aliasOffset += strlen(data + aliasOffset) + 1; 00831 } 00832 00833 // notes 00834 const quint8 notesCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 11); 00835 quint32 notesOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 7); 00836 00837 for (int j = 0; j < notesCount; j++) { 00838 appendToIndex(&i, unicode, QString::fromLatin1(data + notesOffset)); 00839 notesOffset += strlen(data + notesOffset) + 1; 00840 } 00841 00842 // approximate equivalents 00843 const quint8 apprCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 16); 00844 quint32 apprOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 12); 00845 00846 for (int j = 0; j < apprCount; j++) { 00847 appendToIndex(&i, unicode, QString::fromLatin1(data + apprOffset)); 00848 apprOffset += strlen(data + apprOffset) + 1; 00849 } 00850 00851 // equivalents 00852 const quint8 equivCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 21); 00853 quint32 equivOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 17); 00854 00855 for (int j = 0; j < equivCount; j++) { 00856 appendToIndex(&i, unicode, QString::fromLatin1(data + equivOffset)); 00857 equivOffset += strlen(data + equivOffset) + 1; 00858 } 00859 00860 // see also - convert to string (hex) 00861 const quint8 seeAlsoCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 26); 00862 quint32 seeAlsoOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 22); 00863 00864 for (int j = 0; j < seeAlsoCount; j++) { 00865 quint16 seeAlso = qFromLittleEndian<quint16> (udata + seeAlsoOffset); 00866 appendToIndex(&i, unicode, formatCode(seeAlso, 4, QString())); 00867 equivOffset += strlen(data + equivOffset) + 1; 00868 } 00869 } 00870 00871 // unihan data 00872 // temporary disabled due to the huge amount of data 00873 // const quint32 unihanOffsetBegin = qFromLittleEndian<quint32>(udata+36); 00874 // const quint32 unihanOffsetEnd = dataFile.size(); 00875 // max = ((unihanOffsetEnd - unihanOffsetBegin) / 30) - 1; 00876 // 00877 // for (int pos = 0; pos <= max; pos++) { 00878 // const quint16 unicode = qFromLittleEndian<quint16>(udata + unihanOffsetBegin + pos*30); 00879 // for(int j = 0; j < 7; j++) { 00880 // quint32 offset = qFromLittleEndian<quint32>(udata + unihanOffsetBegin + pos*30 + 2 + j*4); 00881 // if(offset != 0) { 00882 // appendToIndex(&i, unicode, QString::fromUtf8(data + offset)); 00883 // } 00884 // } 00885 // } 00886 00887 return i; 00888 }
KDE 4.7 API Reference