KDECore
ksycocadict.cpp
Go to the documentation of this file.
00001 /* This file is part of the KDE libraries 00002 * Copyright (C) 1999 Waldo Bastian <bastian@kde.org> 00003 * 00004 * This library is free software; you can redistribute it and/or 00005 * modify it under the terms of the GNU Library General Public 00006 * License version 2 as published by the Free Software Foundation; 00007 * 00008 * This library is distributed in the hope that it will be useful, 00009 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00010 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00011 * Library General Public License for more details. 00012 * 00013 * You should have received a copy of the GNU Library General Public License 00014 * along with this library; see the file COPYING.LIB. If not, write to 00015 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00016 * Boston, MA 02110-1301, USA. 00017 **/ 00018 00019 #include "ksycocadict_p.h" 00020 #include <kservice.h> 00021 #include "ksycocaentry.h" 00022 #include "ksycoca.h" 00023 #include "kdebug.h" 00024 00025 #include <QtCore/QBitArray> 00026 00027 namespace 00028 { 00029 struct string_entry { 00030 string_entry(const QString& _key, const KSycocaEntry::Ptr& _payload) 00031 : hash(0), length(_key.length()), keyStr(_key), key(keyStr.unicode()), payload(_payload) 00032 {} 00033 uint hash; 00034 const int length; 00035 const QString keyStr; 00036 const QChar * const key; // always points to keyStr.unicode(); just an optimization 00037 const KSycocaEntry::Ptr payload; 00038 }; 00039 } 00040 00041 class KSycocaDictStringList : public QList<string_entry*> 00042 { 00043 public: 00044 ~KSycocaDictStringList() { 00045 qDeleteAll(*this); 00046 } 00047 }; 00048 00049 class KSycocaDict::Private 00050 { 00051 public: 00052 Private() 00053 : stringlist( 0 ), 00054 stream( 0 ), 00055 offset( 0 ) 00056 { 00057 } 00058 00059 ~Private() 00060 { 00061 delete stringlist; 00062 } 00063 00064 // Helper for find_string and findMultiString 00065 qint32 offsetForKey(const QString& key) const; 00066 00067 // Calculate hash - can be used during loading and during saving. 00068 quint32 hashKey(const QString & key) const; 00069 00070 KSycocaDictStringList *stringlist; 00071 QDataStream *stream; 00072 qint64 offset; 00073 quint32 hashTableSize; 00074 QList<qint32> hashList; 00075 }; 00076 00077 KSycocaDict::KSycocaDict() 00078 : d( new Private ) 00079 { 00080 } 00081 00082 KSycocaDict::KSycocaDict(QDataStream *str, int offset) 00083 : d( new Private ) 00084 { 00085 d->stream = str; 00086 d->offset = offset; 00087 00088 quint32 test1, test2; 00089 str->device()->seek(offset); 00090 (*str) >> test1 >> test2; 00091 if ((test1 > 0x000fffff) || (test2 > 1024)) 00092 { 00093 KSycoca::flagError(); 00094 d->hashTableSize = 0; 00095 d->offset = 0; 00096 return; 00097 } 00098 00099 str->device()->seek(offset); 00100 (*str) >> d->hashTableSize; 00101 (*str) >> d->hashList; 00102 d->offset = str->device()->pos(); // Start of hashtable 00103 } 00104 00105 KSycocaDict::~KSycocaDict() 00106 { 00107 delete d; 00108 } 00109 00110 void 00111 KSycocaDict::add(const QString &key, const KSycocaEntry::Ptr& payload) 00112 { 00113 if (key.isEmpty()) return; // Not allowed (should never happen) 00114 if (!payload) return; // Not allowed! 00115 if (!d->stringlist) 00116 { 00117 d->stringlist = new KSycocaDictStringList; 00118 } 00119 00120 string_entry *entry = new string_entry(key, payload); 00121 d->stringlist->append(entry); 00122 } 00123 00124 void 00125 KSycocaDict::remove(const QString &key) 00126 { 00127 if (!d || !d->stringlist) { 00128 return; 00129 } 00130 00131 bool found = false; 00132 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) { 00133 string_entry* entry = *it; 00134 if (entry->keyStr == key) { 00135 d->stringlist->erase(it); 00136 delete entry; 00137 found = true; 00138 break; 00139 } 00140 } 00141 if (!found) { 00142 kWarning(7011) << "key not found:" << key; 00143 } 00144 } 00145 00146 int KSycocaDict::find_string(const QString &key ) const 00147 { 00148 Q_ASSERT(d); 00149 00150 //kDebug(7011) << QString("KSycocaDict::find_string(%1)").arg(key); 00151 qint32 offset = d->offsetForKey(key); 00152 00153 //kDebug(7011) << QString("offset is %1").arg(offset,8,16); 00154 if (offset == 0) 00155 return 0; 00156 00157 if (offset > 0) 00158 return offset; // Positive ID 00159 00160 // Lookup duplicate list. 00161 offset = -offset; 00162 00163 d->stream->device()->seek(offset); 00164 //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16); 00165 00166 while(true) 00167 { 00168 (*d->stream) >> offset; 00169 if (offset == 0) break; 00170 QString dupkey; 00171 (*d->stream) >> dupkey; 00172 //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey); 00173 if (dupkey == key) return offset; 00174 } 00175 //kWarning(7011) << "Not found!"; 00176 00177 return 0; 00178 } 00179 00180 00181 QList<int> KSycocaDict::findMultiString(const QString &key ) const 00182 { 00183 qint32 offset = d->offsetForKey(key); 00184 QList<int> offsetList; 00185 if (offset == 0) 00186 return offsetList; 00187 00188 if (offset > 0) { // Positive ID: one entry found 00189 offsetList.append(offset); 00190 return offsetList; 00191 } 00192 00193 // Lookup duplicate list. 00194 offset = -offset; 00195 00196 d->stream->device()->seek(offset); 00197 //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16); 00198 00199 while(true) 00200 { 00201 (*d->stream) >> offset; 00202 if (offset == 0) break; 00203 QString dupkey; 00204 (*d->stream) >> dupkey; 00205 //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey); 00206 if (dupkey == key) 00207 offsetList.append(offset); 00208 } 00209 return offsetList; 00210 } 00211 00212 uint KSycocaDict::count() const 00213 { 00214 if ( !d || !d->stringlist ) return 0; 00215 00216 return d->stringlist->count(); 00217 } 00218 00219 void 00220 KSycocaDict::clear() 00221 { 00222 delete d; 00223 d = 0; 00224 } 00225 00226 uint KSycocaDict::Private::hashKey( const QString &key) const 00227 { 00228 int l = key.length(); 00229 register uint h = 0; 00230 00231 for(int i = 0; i < hashList.count(); i++) 00232 { 00233 int pos = hashList[i]; 00234 if (pos < 0) { 00235 pos = -pos-1; 00236 if (pos < l) 00237 h = ((h * 13) + (key[l-pos].cell() % 29)) & 0x3ffffff; 00238 } else { 00239 pos = pos-1; 00240 if (pos < l) 00241 h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff; 00242 } 00243 } 00244 return h; 00245 } 00246 00247 // If we have the strings 00248 // hello 00249 // world 00250 // kde 00251 // Then we end up with 00252 // ABCDE 00253 // where A = diversity of 'h' + 'w' + 'k' etc. 00254 // Also, diversity(-2) == 'l'+'l'+'d' (second character from the end) 00255 00256 // The hasList is used for hashing: 00257 // hashList = (-2, 1, 3) means that the hash key comes from 00258 // the 2nd character from the right, then the 1st from the left, then the 3rd from the left. 00259 00260 // Calculate the diversity of the strings at position 'pos' 00261 // NOTE: this code is slow, it takes 30% of the _overall_ `kbuildsycoca4 --noincremental` running time 00262 static int 00263 calcDiversity(KSycocaDictStringList *stringlist, int inPos, uint sz) 00264 { 00265 if (inPos == 0) return 0; 00266 QBitArray matrix(sz); 00267 int pos; 00268 00269 //static const int s_maxItems = 50; 00270 //int numItem = 0; 00271 00272 if (inPos < 0) { 00273 pos = -inPos-1; 00274 for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(); it != stringlist->constEnd(); ++it) 00275 { 00276 string_entry* entry = *it; 00277 register int l = entry->length; 00278 if (pos < l && pos != 0) { 00279 register uint hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3ffffff; 00280 matrix.setBit( hash % sz, true ); 00281 } 00282 //if (++numItem == s_maxItems) 00283 // break; 00284 } 00285 } else { 00286 pos = inPos-1; 00287 for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(); it != stringlist->constEnd(); ++it) 00288 { 00289 string_entry* entry = *it; 00290 if (pos < entry->length) { 00291 register uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff; 00292 matrix.setBit( hash % sz, true ); 00293 } 00294 //if (++numItem == s_maxItems) 00295 // break; 00296 } 00297 } 00298 int diversity = 0; 00299 for (uint i=0;i< sz;++i) 00300 if (matrix.testBit(i)) 00301 ++diversity; 00302 00303 //kDebug() << "inPos=" << inPos << "pos=" << pos << "diversity=" << diversity; 00304 00305 return diversity; 00306 } 00307 00308 // 00309 // Add the diversity of the strings at position 'pos' 00310 static void 00311 addDiversity(KSycocaDictStringList *stringlist, int pos) 00312 { 00313 if (pos == 0) return; 00314 if (pos < 0) { 00315 pos = -pos-1; 00316 for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(); it != stringlist->constEnd(); ++it) 00317 { 00318 string_entry* entry = *it; 00319 register int l = entry->length; 00320 if (pos < l) 00321 entry->hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3fffffff; 00322 } 00323 } else { 00324 pos = pos - 1; 00325 for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(); it != stringlist->constEnd(); ++it) 00326 { 00327 string_entry* entry = *it; 00328 if (pos < entry->length) 00329 entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff; 00330 } 00331 } 00332 } 00333 00334 00335 void 00336 KSycocaDict::save(QDataStream &str) 00337 { 00338 if (count() == 0) 00339 { 00340 d->hashTableSize = 0; 00341 d->hashList.clear(); 00342 str << d->hashTableSize; 00343 str << d->hashList; 00344 return; 00345 } 00346 00347 d->offset = str.device()->pos(); 00348 00349 //kDebug(7011) << "KSycocaDict:" << count() << "entries."; 00350 00351 //kDebug(7011) << "Calculating hash keys.."; 00352 00353 int maxLength = 0; 00354 //kDebug(7011) << "Finding maximum string length"; 00355 for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it) 00356 { 00357 string_entry* entry = *it; 00358 entry->hash = 0; 00359 if (entry->length > maxLength) 00360 maxLength = entry->length; 00361 } 00362 00363 //kDebug(7011) << "Max string length=" << maxLength << "existing hashList=" << d->hashList; 00364 00365 // use "almost prime" number for sz (to calculate diversity) and later 00366 // for the table size of big tables 00367 // int sz = d->stringlist->count()*5-1; 00368 register unsigned int sz = count()*4 + 1; 00369 while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13)))) 00370 sz+=2; 00371 00372 d->hashList.clear(); 00373 00374 // Times (with warm caches, i.e. after multiple runs) 00375 // kbuildsycoca4 --noincremental 2.83s user 0.20s system 95% cpu 3.187 total 00376 // kbuildsycoca4 --noincremental 2.74s user 0.25s system 93% cpu 3.205 total 00377 // unittest: 0.50-60 msec per iteration / 0.40-50 msec per iteration 00378 00379 // Now that MimeTypes are not parsed anymore: 00380 // kbuildsycoca4 --noincremental 2.18s user 0.30s system 91% cpu 2.719 total 00381 // kbuildsycoca4 --noincremental 2.07s user 0.34s system 89% cpu 2.681 total 00382 00383 // If I enabled s_maxItems = 50, it goes down to 00384 // but I don't know if that's a good idea. 00385 // kbuildsycoca4 --noincremental 1.73s user 0.31s system 85% cpu 2.397 total 00386 // kbuildsycoca4 --noincremental 1.84s user 0.29s system 95% cpu 2.230 total 00387 00388 // try to limit diversity scan by "predicting" positions 00389 // with high diversity 00390 QVector<int> oldvec(maxLength*2+1); 00391 oldvec.fill(0); 00392 int mindiv=0; 00393 int lastDiv = 0; 00394 00395 while(true) 00396 { 00397 int divsum=0,divnum=0; 00398 00399 int maxDiv = 0; 00400 int maxPos = 0; 00401 for (int pos = -maxLength; pos <= maxLength; ++pos) { 00402 // cut off 00403 if (oldvec[pos+maxLength] < mindiv) { oldvec[pos+maxLength]=0; continue; } 00404 00405 const int diversity = calcDiversity(d->stringlist, pos, sz); 00406 if (diversity > maxDiv) { 00407 maxDiv = diversity; 00408 maxPos = pos; 00409 } 00410 oldvec[pos + maxLength] = diversity; 00411 divsum += diversity; 00412 ++divnum; 00413 } 00414 // arbitrary cut-off value 3/4 of average seems to work 00415 if (divnum) 00416 mindiv=(3*divsum)/(4*divnum); 00417 00418 if (maxDiv <= lastDiv) 00419 break; 00420 //kDebug() << "Max Div=" << maxDiv << "at pos" << maxPos; 00421 lastDiv = maxDiv; 00422 addDiversity(d->stringlist, maxPos); 00423 d->hashList.append(maxPos); 00424 } 00425 00426 00427 for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) { 00428 (*it)->hash = d->hashKey((*it)->keyStr); 00429 } 00430 // fprintf(stderr, "Calculating minimum table size..\n"); 00431 00432 d->hashTableSize = sz; 00433 00434 //kDebug() << "hashTableSize=" << sz << "hashList=" << d->hashList << "oldvec=" << oldvec; 00435 00436 struct hashtable_entry { 00437 string_entry *entry; 00438 QList<string_entry*>* duplicates; 00439 qint64 duplicate_offset; 00440 }; 00441 00442 hashtable_entry *hashTable = new hashtable_entry[ sz ]; 00443 00444 //kDebug(7011) << "Clearing hashtable..."; 00445 for (unsigned int i=0; i < sz; i++) 00446 { 00447 hashTable[i].entry = 0; 00448 hashTable[i].duplicates = 0; 00449 } 00450 00451 //kDebug(7011) << "Filling hashtable..."; 00452 for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it) 00453 { 00454 string_entry* entry = *it; 00455 //kDebug(7011) << "entry keyStr=" << entry->keyStr << entry->payload.data() << entry->payload->entryPath(); 00456 int hash = entry->hash % sz; 00457 if (!hashTable[hash].entry) 00458 { // First entry 00459 hashTable[hash].entry = entry; 00460 } 00461 else 00462 { 00463 if (!hashTable[hash].duplicates) 00464 { // Second entry, build duplicate list. 00465 hashTable[hash].duplicates = new QList<string_entry*>; 00466 hashTable[hash].duplicates->append(hashTable[hash].entry); 00467 hashTable[hash].duplicate_offset = 0; 00468 } 00469 hashTable[hash].duplicates->append(entry); 00470 } 00471 } 00472 00473 str << d->hashTableSize; 00474 str << d->hashList; 00475 00476 d->offset = str.device()->pos(); // d->offset points to start of hashTable 00477 //kDebug(7011) << QString("Start of Hash Table, offset = %1").arg(d->offset,8,16); 00478 00479 // Write the hashtable + the duplicates twice. 00480 // The duplicates are after the normal hashtable, but the offset of each 00481 // duplicate entry is written into the normal hashtable. 00482 for(int pass = 1; pass <= 2; pass++) 00483 { 00484 str.device()->seek(d->offset); 00485 //kDebug(7011) << QString("Writing hash table (pass #%1)").arg(pass); 00486 for(uint i=0; i < d->hashTableSize; i++) 00487 { 00488 qint32 tmpid; 00489 if (!hashTable[i].entry) 00490 tmpid = (qint32) 0; 00491 else if (!hashTable[i].duplicates) 00492 tmpid = (qint32) hashTable[i].entry->payload->offset(); // Positive ID 00493 else 00494 tmpid = (qint32) -hashTable[i].duplicate_offset; // Negative ID 00495 str << tmpid; 00496 //kDebug(7011) << QString("Hash table : %1").arg(tmpid,8,16); 00497 } 00498 //kDebug(7011) << QString("End of Hash Table, offset = %1").arg(str.device()->at(),8,16); 00499 00500 //kDebug(7011) << QString("Writing duplicate lists (pass #%1)").arg(pass); 00501 for(uint i=0; i < d->hashTableSize; i++) 00502 { 00503 const QList<string_entry*> *dups = hashTable[i].duplicates; 00504 if (dups) 00505 { 00506 hashTable[i].duplicate_offset = str.device()->pos(); 00507 00508 /*kDebug(7011) << QString("Duplicate lists: Offset = %1 list_size = %2") .arg(hashTable[i].duplicate_offset,8,16).arg(dups->count()); 00509 */ 00510 for(QList<string_entry*>::ConstIterator dup = dups->begin(); dup != dups->end(); ++dup) 00511 { 00512 const qint32 offset = (*dup)->payload->offset(); 00513 if (!offset) { 00514 const QString storageId = (*dup)->payload->storageId(); 00515 kDebug() << "about to assert! dict=" << this << "storageId=" << storageId << (*dup)->payload.data(); 00516 if ((*dup)->payload->isType(KST_KService)) { 00517 KService::Ptr service = KService::Ptr::staticCast((*dup)->payload); 00518 kDebug() << service->storageId() << service->entryPath(); 00519 } 00520 // save() must have been called on the entry 00521 Q_ASSERT_X( offset, "KSycocaDict::save", 00522 QByteArray("entry offset is 0, save() was not called on ") 00523 + (*dup)->payload->storageId().toLatin1() 00524 + " entryPath=" 00525 + (*dup)->payload->entryPath().toLatin1() 00526 ); 00527 } 00528 str << offset ; // Positive ID 00529 str << (*dup)->keyStr; // Key (QString) 00530 } 00531 str << (qint32) 0; // End of list marker (0) 00532 } 00533 } 00534 //kDebug(7011) << QString("End of Dict, offset = %1").arg(str.device()->at(),8,16); 00535 } 00536 00537 //kDebug(7011) << "Cleaning up hash table."; 00538 for(uint i=0; i < d->hashTableSize; i++) 00539 { 00540 delete hashTable[i].duplicates; 00541 } 00542 delete [] hashTable; 00543 } 00544 00545 qint32 KSycocaDict::Private::offsetForKey(const QString& key) const 00546 { 00547 if ( !stream || !offset ) 00548 { 00549 kError() << "No ksycoca4 database available!" << endl; 00550 return 0; 00551 } 00552 00553 if (hashTableSize == 0) 00554 return 0; // Unlikely to find anything :-] 00555 00556 // Read hash-table data 00557 const uint hash = hashKey(key) % hashTableSize; 00558 //kDebug(7011) << "hash is" << hash; 00559 00560 const qint32 off = offset+sizeof(qint32)*hash; 00561 //kDebug(7011) << QString("off is %1").arg(off,8,16); 00562 stream->device()->seek( off ); 00563 00564 qint32 retOffset; 00565 (*stream) >> retOffset; 00566 return retOffset; 00567 }
KDE 4.6 API Reference