KDEUI
kcharselectdata.cpp
Go to the documentation of this file.
00001 /* This file is part of the KDE libraries 00002 00003 Copyright (C) 2007 Daniel Laidig <d.laidig@gmx.de> 00004 00005 This library is free software; you can redistribute it and/or 00006 modify it under the terms of the GNU Library General Public 00007 License as published by the Free Software Foundation; either 00008 version 2 of the License, or (at your option) any later version. 00009 00010 This library is distributed in the hope that it will be useful, 00011 but WITHOUT ANY WARRANTY; without even the implied warranty of 00012 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00013 Library General Public License for more details. 00014 00015 You should have received a copy of the GNU Library General Public License 00016 along with this library; see the file COPYING.LIB. If not, write to 00017 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00018 Boston, MA 02110-1301, USA. 00019 */ 00020 00021 #include "kcharselectdata_p.h" 00022 00023 #include <QStringList> 00024 #include <QFile> 00025 #include <qendian.h> 00026 #include <QtConcurrentRun> 00027 00028 #include <string.h> 00029 #include <klocalizedstring.h> 00030 #include <kstandarddirs.h> 00031 00032 /* constants for hangul (de)composition, see UAX #15 */ 00033 #define SBase 0xAC00 00034 #define LBase 0x1100 00035 #define VBase 0x1161 00036 #define TBase 0x11A7 00037 #define LCount 19 00038 #define VCount 21 00039 #define TCount 28 00040 #define NCount (VCount * TCount) 00041 #define SCount (LCount * NCount) 00042 00043 static const char JAMO_L_TABLE[][4] = 00044 { 00045 "G", "GG", "N", "D", "DD", "R", "M", "B", "BB", 00046 "S", "SS", "", "J", "JJ", "C", "K", "T", "P", "H" 00047 }; 00048 00049 static const char JAMO_V_TABLE[][4] = 00050 { 00051 "A", "AE", "YA", "YAE", "EO", "E", "YEO", "YE", "O", 00052 "WA", "WAE", "OE", "YO", "U", "WEO", "WE", "WI", 00053 "YU", "EU", "YI", "I" 00054 }; 00055 00056 static const char JAMO_T_TABLE[][4] = 00057 { 00058 "", "G", "GG", "GS", "N", "NJ", "NH", "D", "L", "LG", "LM", 00059 "LB", "LS", "LT", "LP", "LH", "M", "B", "BS", 00060 "S", "SS", "NG", "J", "C", "K", "T", "P", "H" 00061 }; 00062 00063 bool KCharSelectData::openDataFile() 00064 { 00065 if(!dataFile.isEmpty()) { 00066 return true; 00067 } else { 00068 QFile file(KStandardDirs::locate("data", "kcharselect/kcharselect-data")); 00069 if (!file.open(QIODevice::ReadOnly)) { 00070 return false; 00071 } 00072 dataFile = file.readAll(); 00073 file.close(); 00074 futureIndex = QtConcurrent::run(this, &KCharSelectData::createIndex, dataFile); 00075 return true; 00076 } 00077 } 00078 00079 quint32 KCharSelectData::getDetailIndex(const QChar& c) const 00080 { 00081 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData()); 00082 // Convert from little-endian, so that this code works on PPC too. 00083 // http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=482286 00084 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+12); 00085 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+16); 00086 00087 int min = 0; 00088 int mid; 00089 int max = ((offsetEnd - offsetBegin) / 27) - 1; 00090 00091 quint16 unicode = c.unicode(); 00092 00093 static quint16 most_recent_searched; 00094 static quint32 most_recent_result; 00095 00096 00097 if (unicode == most_recent_searched) 00098 return most_recent_result; 00099 00100 most_recent_searched = unicode; 00101 00102 while (max >= min) { 00103 mid = (min + max) / 2; 00104 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*27); 00105 if (unicode > midUnicode) 00106 min = mid + 1; 00107 else if (unicode < midUnicode) 00108 max = mid - 1; 00109 else { 00110 most_recent_result = offsetBegin + mid*27; 00111 00112 return most_recent_result; 00113 } 00114 } 00115 00116 most_recent_result = 0; 00117 return 0; 00118 } 00119 00120 QString KCharSelectData::formatCode(ushort code, int length, const QString& prefix, int base) 00121 { 00122 QString s = QString::number(code, base).toUpper(); 00123 while (s.size() < length) 00124 s.prepend('0'); 00125 s.prepend(prefix); 00126 return s; 00127 } 00128 00129 QList<QChar> KCharSelectData::blockContents(int block) 00130 { 00131 if(!openDataFile()) { 00132 return QList<QChar>(); 00133 } 00134 00135 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData()); 00136 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20); 00137 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24); 00138 00139 int max = ((offsetEnd - offsetBegin) / 4) - 1; 00140 00141 QList<QChar> res; 00142 00143 if(block > max) 00144 return res; 00145 00146 quint16 unicodeBegin = qFromLittleEndian<quint16>(data + offsetBegin + block*4); 00147 quint16 unicodeEnd = qFromLittleEndian<quint16>(data + offsetBegin + block*4 + 2); 00148 00149 while(unicodeBegin < unicodeEnd) { 00150 res.append(unicodeBegin); 00151 unicodeBegin++; 00152 } 00153 res.append(unicodeBegin); // Be carefull when unicodeEnd==0xffff 00154 00155 return res; 00156 } 00157 00158 QList<int> KCharSelectData::sectionContents(int section) 00159 { 00160 if(!openDataFile()) { 00161 return QList<int>(); 00162 } 00163 00164 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData()); 00165 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28); 00166 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32); 00167 00168 int max = ((offsetEnd - offsetBegin) / 4) - 1; 00169 00170 QList<int> res; 00171 00172 if(section > max) 00173 return res; 00174 00175 for(int i = 0; i <= max; i++) { 00176 const quint16 currSection = qFromLittleEndian<quint16>(data + offsetBegin + i*4); 00177 if(currSection == section) { 00178 res.append( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) ); 00179 } 00180 } 00181 00182 return res; 00183 } 00184 00185 QStringList KCharSelectData::sectionList() 00186 { 00187 if(!openDataFile()) { 00188 return QStringList(); 00189 } 00190 00191 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00192 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24); 00193 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28); 00194 00195 const char* data = dataFile.constData(); 00196 QStringList list; 00197 quint32 i = stringBegin; 00198 while(i < stringEnd) { 00199 list.append(i18nc("KCharSelect section name", data + i)); 00200 i += strlen(data + i) + 1; 00201 } 00202 00203 return list; 00204 } 00205 00206 QString KCharSelectData::block(const QChar& c) 00207 { 00208 return blockName(blockIndex(c)); 00209 } 00210 00211 QString KCharSelectData::section(const QChar& c) 00212 { 00213 return sectionName(sectionIndex(blockIndex(c))); 00214 } 00215 00216 QString KCharSelectData::name(const QChar& c) 00217 { 00218 if(!openDataFile()) { 00219 return QString(); 00220 } 00221 00222 ushort unicode = c.unicode(); 00223 if ((unicode >= 0x3400 && unicode <= 0x4DB5) 00224 || (unicode >= 0x4e00 && unicode <= 0x9fa5)) { 00225 // || (unicode >= 0x20000 && unicode <= 0x2A6D6) // useless, since limited to 16 bit 00226 return "CJK UNIFIED IDEOGRAPH-" + QString::number(unicode, 16); 00227 } else if (c >= 0xac00 && c <= 0xd7af) { 00228 /* compute hangul syllable name as per UAX #15 */ 00229 int SIndex = c.unicode() - SBase; 00230 int LIndex, VIndex, TIndex; 00231 00232 if (SIndex < 0 || SIndex >= SCount) 00233 return QString(); 00234 00235 LIndex = SIndex / NCount; 00236 VIndex = (SIndex % NCount) / TCount; 00237 TIndex = SIndex % TCount; 00238 00239 return QString("HANGUL SYLLABLE ") + JAMO_L_TABLE[LIndex] + JAMO_V_TABLE[VIndex] + JAMO_T_TABLE[TIndex]; 00240 } else if (unicode >= 0xD800 && unicode <= 0xDB7F) 00241 return i18n("<Non Private Use High Surrogate>"); 00242 else if (unicode >= 0xDB80 && unicode <= 0xDBFF) 00243 return i18n("<Private Use High Surrogate>"); 00244 else if (unicode >= 0xDC00 && unicode <= 0xDFFF) 00245 return i18n("<Low Surrogate>"); 00246 else if (unicode >= 0xE000 && unicode <= 0xF8FF) 00247 return i18n("<Private Use>"); 00248 // else if (unicode >= 0xF0000 && unicode <= 0xFFFFD) // 16 bit! 00249 // return i18n("<Plane 15 Private Use>"); 00250 // else if (unicode >= 0x100000 && unicode <= 0x10FFFD) 00251 // return i18n("<Plane 16 Private Use>"); 00252 else { 00253 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData()); 00254 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+4); 00255 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+8); 00256 00257 int min = 0; 00258 int mid; 00259 int max = ((offsetEnd - offsetBegin) / 6) - 1; 00260 QString s; 00261 00262 while (max >= min) { 00263 mid = (min + max) / 2; 00264 const quint16 midUnicode = qFromLittleEndian<quint16>(data + offsetBegin + mid*6); 00265 if (unicode > midUnicode) 00266 min = mid + 1; 00267 else if (unicode < midUnicode) 00268 max = mid - 1; 00269 else { 00270 quint32 offset = qFromLittleEndian<quint32>(data + offsetBegin + mid*6 + 2); 00271 s = QString(dataFile.constData() + offset); 00272 break; 00273 } 00274 } 00275 00276 if (s.isNull()) { 00277 return i18n("<not assigned>"); 00278 } else { 00279 return s; 00280 } 00281 } 00282 } 00283 00284 int KCharSelectData::blockIndex(const QChar& c) 00285 { 00286 if(!openDataFile()) { 00287 return 0; 00288 } 00289 00290 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData()); 00291 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+20); 00292 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+24); 00293 const quint16 unicode = c.unicode(); 00294 00295 int max = ((offsetEnd - offsetBegin) / 4) - 1; 00296 00297 int i = 0; 00298 00299 while (unicode > qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) && i < max) { 00300 i++; 00301 } 00302 00303 return i; 00304 } 00305 00306 int KCharSelectData::sectionIndex(int block) 00307 { 00308 if(!openDataFile()) { 00309 return 0; 00310 } 00311 00312 const uchar* data = reinterpret_cast<const uchar*>(dataFile.constData()); 00313 const quint32 offsetBegin = qFromLittleEndian<quint32>(data+28); 00314 const quint32 offsetEnd = qFromLittleEndian<quint32>(data+32); 00315 00316 int max = ((offsetEnd - offsetBegin) / 4) - 1; 00317 00318 for(int i = 0; i <= max; i++) { 00319 if( qFromLittleEndian<quint16>(data + offsetBegin + i*4 + 2) == block) { 00320 return qFromLittleEndian<quint16>(data + offsetBegin + i*4); 00321 } 00322 } 00323 00324 return 0; 00325 } 00326 00327 QString KCharSelectData::blockName(int index) 00328 { 00329 if(!openDataFile()) { 00330 return QString(); 00331 } 00332 00333 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00334 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+16); 00335 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+20); 00336 00337 quint32 i = stringBegin; 00338 int currIndex = 0; 00339 00340 const char* data = dataFile.constData(); 00341 while(i < stringEnd && currIndex < index) { 00342 i += strlen(data + i) + 1; 00343 currIndex++; 00344 } 00345 00346 return i18nc("KCharselect unicode block name", data + i); 00347 } 00348 00349 QString KCharSelectData::sectionName(int index) 00350 { 00351 if(!openDataFile()) { 00352 return QString(); 00353 } 00354 00355 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00356 const quint32 stringBegin = qFromLittleEndian<quint32>(udata+24); 00357 const quint32 stringEnd = qFromLittleEndian<quint32>(udata+28); 00358 00359 quint32 i = stringBegin; 00360 int currIndex = 0; 00361 00362 const char* data = dataFile.constData(); 00363 while(i < stringEnd && currIndex < index) { 00364 i += strlen(data + i) + 1; 00365 currIndex++; 00366 } 00367 00368 return i18nc("KCharselect unicode section name", data + i); 00369 } 00370 00371 QStringList KCharSelectData::aliases(const QChar& c) 00372 { 00373 if(!openDataFile()) { 00374 return QStringList(); 00375 } 00376 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00377 const int detailIndex = getDetailIndex(c); 00378 if(detailIndex == 0) { 00379 return QStringList(); 00380 } 00381 00382 const quint8 count = * (quint8 *)(udata + detailIndex + 6); 00383 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 2); 00384 00385 QStringList aliases; 00386 00387 const char* data = dataFile.constData(); 00388 for (int i = 0; i < count; i++) { 00389 aliases.append(QString::fromUtf8(data + offset)); 00390 offset += strlen(data + offset) + 1; 00391 } 00392 return aliases; 00393 } 00394 00395 QStringList KCharSelectData::notes(const QChar& c) 00396 { 00397 if(!openDataFile()) { 00398 return QStringList(); 00399 } 00400 const int detailIndex = getDetailIndex(c); 00401 if(detailIndex == 0) { 00402 return QStringList(); 00403 } 00404 00405 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00406 const quint8 count = * (quint8 *)(udata + detailIndex + 11); 00407 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 7); 00408 00409 QStringList notes; 00410 00411 const char* data = dataFile.constData(); 00412 for (int i = 0; i < count; i++) { 00413 notes.append(QString::fromLatin1(data + offset)); 00414 offset += strlen(data + offset) + 1; 00415 } 00416 00417 return notes; 00418 } 00419 00420 QList<QChar> KCharSelectData::seeAlso(const QChar& c) 00421 { 00422 if(!openDataFile()) { 00423 return QList<QChar>(); 00424 } 00425 const int detailIndex = getDetailIndex(c); 00426 if(detailIndex == 0) { 00427 return QList<QChar>(); 00428 } 00429 00430 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00431 const quint8 count = * (quint8 *)(udata + detailIndex + 26); 00432 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 22); 00433 00434 QList<QChar> seeAlso; 00435 00436 for (int i = 0; i < count; i++) { 00437 seeAlso.append(qFromLittleEndian<quint16> (udata + offset)); 00438 offset += 2; 00439 } 00440 00441 return seeAlso; 00442 } 00443 00444 QStringList KCharSelectData::equivalents(const QChar& c) 00445 { 00446 if(!openDataFile()) { 00447 return QStringList(); 00448 } 00449 const int detailIndex = getDetailIndex(c); 00450 if(detailIndex == 0) { 00451 return QStringList(); 00452 } 00453 00454 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00455 const quint8 count = * (quint8 *)(udata + detailIndex + 21); 00456 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 17); 00457 00458 QStringList equivalents; 00459 00460 const char* data = dataFile.constData(); 00461 for (int i = 0; i < count; i++) { 00462 equivalents.append(QString::fromUtf8(data + offset)); 00463 offset += strlen(data + offset) + 1; 00464 } 00465 00466 return equivalents; 00467 } 00468 00469 QStringList KCharSelectData::approximateEquivalents(const QChar& c) 00470 { 00471 if(!openDataFile()) { 00472 return QStringList(); 00473 } 00474 const int detailIndex = getDetailIndex(c); 00475 if(detailIndex == 0) { 00476 return QStringList(); 00477 } 00478 00479 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00480 const quint8 count = * (quint8 *)(udata + detailIndex + 16); 00481 quint32 offset = qFromLittleEndian<quint32>(udata + detailIndex + 12); 00482 00483 QStringList approxEquivalents; 00484 00485 const char* data = dataFile.constData(); 00486 for (int i = 0; i < count; i++) { 00487 approxEquivalents.append(QString::fromUtf8(data + offset)); 00488 offset += strlen(data + offset) + 1; 00489 } 00490 00491 return approxEquivalents; 00492 } 00493 00494 QStringList KCharSelectData::unihanInfo(const QChar& c) 00495 { 00496 if(!openDataFile()) { 00497 return QStringList(); 00498 } 00499 00500 const char* data = dataFile.constData(); 00501 const uchar* udata = reinterpret_cast<const uchar*>(data); 00502 const quint32 offsetBegin = qFromLittleEndian<quint32>(udata+36); 00503 const quint32 offsetEnd = dataFile.size(); 00504 00505 int min = 0; 00506 int mid; 00507 int max = ((offsetEnd - offsetBegin) / 30) - 1; 00508 quint16 unicode = c.unicode(); 00509 00510 while (max >= min) { 00511 mid = (min + max) / 2; 00512 const quint16 midUnicode = qFromLittleEndian<quint16>(udata + offsetBegin + mid*30); 00513 if (unicode > midUnicode) 00514 min = mid + 1; 00515 else if (unicode < midUnicode) 00516 max = mid - 1; 00517 else { 00518 QStringList res; 00519 for(int i = 0; i < 7; i++) { 00520 quint32 offset = qFromLittleEndian<quint32>(udata + offsetBegin + mid*30 + 2 + i*4); 00521 if(offset != 0) { 00522 res.append(QString::fromUtf8(data + offset)); 00523 } else { 00524 res.append(QString()); 00525 } 00526 } 00527 return res; 00528 } 00529 } 00530 00531 return QStringList(); 00532 } 00533 00534 bool KCharSelectData::isDisplayable(const QChar& c) 00535 { 00536 // Qt internally uses U+FDD0 and U+FDD1 to mark the beginning and the end of frames. 00537 // They should be seen as non-printable characters, as trying to display them leads 00538 // to a crash caused by a Qt "noBlockInString" assertion. 00539 if(c == 0xFDD0 || c == 0xFDD1) 00540 return false; 00541 00542 return c.isPrint() && !isIgnorable(c); 00543 } 00544 00545 bool KCharSelectData::isIgnorable(const QChar& c) 00546 { 00547 /* 00548 * According to the Unicode standard, Default Ignorable Code Points 00549 * should be ignored unless explicitly supported. For example, U+202E 00550 * RIGHT-TO-LEFT-OVERRIDE ir printable according to Qt, but displaying 00551 * it gives the undesired effect of all text being turned RTL. We do not 00552 * have a way to "explicitly" support it, so we will treat it as 00553 * non-printable. 00554 * 00555 * There is a list of these on 00556 * http://unicode.org/Public/UNIDATA/DerivedCoreProperties.txt under the 00557 * property Default_Ignorable_Code_Point. 00558 */ 00559 00560 //NOTE: not very nice to hardcode these here; is it worth it to modify 00561 // the binary data file to hold them? 00562 return c == 0x00AD || c == 0x034F || c == 0x115F || c == 0x1160 || 00563 c == 0x17B4 || c == 0x17B5 || (c >= 0x180B && c <= 0x180D) || 00564 (c >= 0x200B && c <= 0x200F) || (c >= 0x202A && c <= 0x202E) || 00565 (c >= 0x2060 && c <= 0x206F) || c == 0x3164 || 00566 (c >= 0xFE00 && c <= 0xFE0F) || c == 0xFEFF || c == 0xFFA0 || 00567 (c >= 0xFFF0 && c <= 0xFFF8); 00568 } 00569 00570 bool KCharSelectData::isCombining(const QChar &c) 00571 { 00572 return section(c) == i18nc("KCharSelect section name", "Combining Diacritical Marks"); 00573 //FIXME: this is an imperfect test. There are many combining characters 00574 // that are outside of this section. See Grapheme_Extend in 00575 // http://www.unicode.org/Public/UNIDATA/DerivedCoreProperties.txt 00576 } 00577 00578 QString KCharSelectData::display(const QChar &c, const QFont &font) 00579 { 00580 if (!isDisplayable(c)) { 00581 return QString("<b>") + i18n("Non-printable") + "</b>"; 00582 } else { 00583 QString s = QString("<font size=\"+4\" face=\"") + font.family() + "\">"; 00584 if (isCombining(c)) { 00585 s += displayCombining(c); 00586 } else { 00587 s += "&#" + QString::number(c.unicode()) + ';'; 00588 } 00589 s += "</font>"; 00590 return s; 00591 } 00592 } 00593 00594 QString KCharSelectData::displayCombining(const QChar &c) 00595 { 00596 /* 00597 * The purpose of this is to make it easier to see how a combining 00598 * character affects the text around it. 00599 * The initial plan was to use U+25CC DOTTED CIRCLE for this purpose, 00600 * as seen in pdfs from Unicode, but there seem to be a lot of alignment 00601 * problems with that. 00602 * 00603 * Eventually, it would be nice to determine whether the character 00604 * combines to the left or to the right, etc. 00605 */ 00606 QString s = " &#" + QString::number(c.unicode()) + "; " + 00607 " (ab&#" + QString::number(c.unicode()) + ";c)"; 00608 return s; 00609 } 00610 00611 QString KCharSelectData::categoryText(QChar::Category category) 00612 { 00613 switch (category) { 00614 case QChar::Other_Control: return i18n("Other, Control"); 00615 case QChar::Other_Format: return i18n("Other, Format"); 00616 case QChar::Other_NotAssigned: return i18n("Other, Not Assigned"); 00617 case QChar::Other_PrivateUse: return i18n("Other, Private Use"); 00618 case QChar::Other_Surrogate: return i18n("Other, Surrogate"); 00619 case QChar::Letter_Lowercase: return i18n("Letter, Lowercase"); 00620 case QChar::Letter_Modifier: return i18n("Letter, Modifier"); 00621 case QChar::Letter_Other: return i18n("Letter, Other"); 00622 case QChar::Letter_Titlecase: return i18n("Letter, Titlecase"); 00623 case QChar::Letter_Uppercase: return i18n("Letter, Uppercase"); 00624 case QChar::Mark_SpacingCombining: return i18n("Mark, Spacing Combining"); 00625 case QChar::Mark_Enclosing: return i18n("Mark, Enclosing"); 00626 case QChar::Mark_NonSpacing: return i18n("Mark, Non-Spacing"); 00627 case QChar::Number_DecimalDigit: return i18n("Number, Decimal Digit"); 00628 case QChar::Number_Letter: return i18n("Number, Letter"); 00629 case QChar::Number_Other: return i18n("Number, Other"); 00630 case QChar::Punctuation_Connector: return i18n("Punctuation, Connector"); 00631 case QChar::Punctuation_Dash: return i18n("Punctuation, Dash"); 00632 case QChar::Punctuation_Close: return i18n("Punctuation, Close"); 00633 case QChar::Punctuation_FinalQuote: return i18n("Punctuation, Final Quote"); 00634 case QChar::Punctuation_InitialQuote: return i18n("Punctuation, Initial Quote"); 00635 case QChar::Punctuation_Other: return i18n("Punctuation, Other"); 00636 case QChar::Punctuation_Open: return i18n("Punctuation, Open"); 00637 case QChar::Symbol_Currency: return i18n("Symbol, Currency"); 00638 case QChar::Symbol_Modifier: return i18n("Symbol, Modifier"); 00639 case QChar::Symbol_Math: return i18n("Symbol, Math"); 00640 case QChar::Symbol_Other: return i18n("Symbol, Other"); 00641 case QChar::Separator_Line: return i18n("Separator, Line"); 00642 case QChar::Separator_Paragraph: return i18n("Separator, Paragraph"); 00643 case QChar::Separator_Space: return i18n("Separator, Space"); 00644 default: return i18n("Unknown"); 00645 } 00646 } 00647 00648 QList<QChar> KCharSelectData::find(const QString& needle) 00649 { 00650 QSet<quint16> result; 00651 00652 QList<QChar> returnRes; 00653 QString simplified = needle.simplified(); 00654 QStringList searchStrings = splitString(needle.simplified()); 00655 00656 if(simplified.length() == 1) { 00657 // search for hex representation of the character 00658 searchStrings = QStringList(formatCode(simplified.at(0).unicode())); 00659 } 00660 00661 if (searchStrings.count() == 0) { 00662 return returnRes; 00663 } 00664 00665 QRegExp regExp("^(|u\\+|U\\+|0x|0X)([A-Fa-f0-9]{4})$"); 00666 foreach(const QString &s, searchStrings) { 00667 if(regExp.exactMatch(s)) { 00668 returnRes.append(regExp.cap(2).toInt(0, 16)); 00669 // search for "1234" instead of "0x1234" 00670 if (s.length() == 6) { 00671 searchStrings[searchStrings.indexOf(s)] = regExp.cap(2); 00672 } 00673 } 00674 // try to parse string as decimal number 00675 bool ok; 00676 int unicode = s.toInt(&ok); 00677 if (ok && unicode >= 0 && unicode <= 0xFFFF) { 00678 returnRes.append(unicode); 00679 } 00680 } 00681 00682 bool firstSubString = true; 00683 foreach(const QString &s, searchStrings) { 00684 QSet<quint16> partResult = getMatchingChars(s.toLower()); 00685 if (firstSubString) { 00686 result = partResult; 00687 firstSubString = false; 00688 } else { 00689 result = result.intersect(partResult); 00690 } 00691 } 00692 00693 // remove results found by matching the code point to prevent duplicate results 00694 // while letting these characters stay at the beginning 00695 foreach(const QChar &c, returnRes) { 00696 result.remove(c.unicode()); 00697 } 00698 00699 QList<quint16> sortedResult = result.toList(); 00700 qSort(sortedResult); 00701 00702 foreach(const quint16 &c, sortedResult) { 00703 returnRes.append(c); 00704 } 00705 00706 return returnRes; 00707 } 00708 00709 QSet<quint16> KCharSelectData::getMatchingChars(const QString& s) 00710 { 00711 futureIndex.waitForFinished(); 00712 const Index index = futureIndex; 00713 Index::const_iterator pos = index.lowerBound(s); 00714 QSet<quint16> result; 00715 00716 while (pos != index.constEnd() && pos.key().startsWith(s)) { 00717 foreach (const quint16 &c, pos.value()) { 00718 result.insert(c); 00719 } 00720 ++pos; 00721 } 00722 00723 return result; 00724 } 00725 00726 QStringList KCharSelectData::splitString(const QString& s) 00727 { 00728 QStringList result; 00729 int start = 0; 00730 int end = 0; 00731 int length = s.length(); 00732 while (end < length) { 00733 while (end < length && (s[end].isLetterOrNumber() || s[end] == '+')) { 00734 end++; 00735 } 00736 if (start != end) { 00737 result.append(s.mid(start, end - start)); 00738 } 00739 start = end; 00740 while (end < length && !(s[end].isLetterOrNumber() || s[end] == '+')) { 00741 end++; 00742 start++; 00743 } 00744 } 00745 return result; 00746 } 00747 00748 void KCharSelectData::appendToIndex(Index *index, quint16 unicode, const QString& s) 00749 { 00750 const QStringList strings = splitString(s); 00751 foreach(const QString &s, strings) { 00752 (*index)[s.toLower()].append(unicode); 00753 } 00754 } 00755 00756 Index KCharSelectData::createIndex(const QByteArray& dataFile) 00757 { 00758 Index i; 00759 00760 // character names 00761 const uchar* udata = reinterpret_cast<const uchar*>(dataFile.constData()); 00762 const char* data = dataFile.constData(); 00763 const quint32 nameOffsetBegin = qFromLittleEndian<quint32>(udata+4); 00764 const quint32 nameOffsetEnd = qFromLittleEndian<quint32>(udata+8); 00765 00766 int max = ((nameOffsetEnd - nameOffsetBegin) / 6) - 1; 00767 00768 for (int pos = 0; pos <= max; pos++) { 00769 const quint16 unicode = qFromLittleEndian<quint16>(udata + nameOffsetBegin + pos*6); 00770 quint32 offset = qFromLittleEndian<quint32>(udata + nameOffsetBegin + pos*6 + 2); 00771 appendToIndex(&i, unicode, QString(data + offset)); 00772 } 00773 00774 // details 00775 const quint32 detailsOffsetBegin = qFromLittleEndian<quint32>(udata+12); 00776 const quint32 detailsOffsetEnd = qFromLittleEndian<quint32>(udata+16); 00777 00778 max = ((detailsOffsetEnd - detailsOffsetBegin) / 27) - 1; 00779 00780 for (int pos = 0; pos <= max; pos++) { 00781 const quint16 unicode = qFromLittleEndian<quint16>(udata + detailsOffsetBegin + pos*27); 00782 00783 // aliases 00784 const quint8 aliasCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 6); 00785 quint32 aliasOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 2); 00786 00787 for (int j = 0; j < aliasCount; j++) { 00788 appendToIndex(&i, unicode, QString::fromUtf8(data + aliasOffset)); 00789 aliasOffset += strlen(data + aliasOffset) + 1; 00790 } 00791 00792 // notes 00793 const quint8 notesCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 11); 00794 quint32 notesOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 7); 00795 00796 for (int j = 0; j < notesCount; j++) { 00797 appendToIndex(&i, unicode, QString::fromUtf8(data + notesOffset)); 00798 notesOffset += strlen(data + notesOffset) + 1; 00799 } 00800 00801 // approximate equivalents 00802 const quint8 apprCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 16); 00803 quint32 apprOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 12); 00804 00805 for (int j = 0; j < apprCount; j++) { 00806 appendToIndex(&i, unicode, QString::fromUtf8(data + apprOffset)); 00807 apprOffset += strlen(data + apprOffset) + 1; 00808 } 00809 00810 // equivalents 00811 const quint8 equivCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 21); 00812 quint32 equivOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 17); 00813 00814 for (int j = 0; j < equivCount; j++) { 00815 appendToIndex(&i, unicode, QString::fromUtf8(data + equivOffset)); 00816 equivOffset += strlen(data + equivOffset) + 1; 00817 } 00818 00819 // see also - convert to string (hex) 00820 const quint8 seeAlsoCount = * (quint8 *)(udata + detailsOffsetBegin + pos*27 + 26); 00821 quint32 seeAlsoOffset = qFromLittleEndian<quint32>(udata + detailsOffsetBegin + pos*27 + 22); 00822 00823 for (int j = 0; j < seeAlsoCount; j++) { 00824 quint16 seeAlso = qFromLittleEndian<quint16> (udata + seeAlsoOffset); 00825 appendToIndex(&i, unicode, formatCode(seeAlso, 4, QString())); 00826 equivOffset += strlen(data + equivOffset) + 1; 00827 } 00828 } 00829 00830 // unihan data 00831 // temporary disabled due to the huge amount of data 00832 // const quint32 unihanOffsetBegin = qFromLittleEndian<quint32>(udata+36); 00833 // const quint32 unihanOffsetEnd = dataFile.size(); 00834 // max = ((unihanOffsetEnd - unihanOffsetBegin) / 30) - 1; 00835 // 00836 // for (int pos = 0; pos <= max; pos++) { 00837 // const quint16 unicode = qFromLittleEndian<quint16>(udata + unihanOffsetBegin + pos*30); 00838 // for(int j = 0; j < 7; j++) { 00839 // quint32 offset = qFromLittleEndian<quint32>(udata + unihanOffsetBegin + pos*30 + 2 + j*4); 00840 // if(offset != 0) { 00841 // appendToIndex(&i, unicode, QString::fromUtf8(data + offset)); 00842 // } 00843 // } 00844 // } 00845 00846 return i; 00847 }
KDE 4.6 API Reference