• Skip to content
  • Skip to link menu
KDE 4.7 API Reference
  • KDE API Reference
  • kdelibs
  • KDE Home
  • Contact Us
 

KDECore

ksycocadict.cpp
Go to the documentation of this file.
00001 /*  This file is part of the KDE libraries
00002  *  Copyright (C) 1999 Waldo Bastian <bastian@kde.org>
00003  *
00004  *  This library is free software; you can redistribute it and/or
00005  *  modify it under the terms of the GNU Library General Public
00006  *  License version 2 as published by the Free Software Foundation;
00007  *
00008  *  This library is distributed in the hope that it will be useful,
00009  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00011  *  Library General Public License for more details.
00012  *
00013  *  You should have received a copy of the GNU Library General Public License
00014  *  along with this library; see the file COPYING.LIB.  If not, write to
00015  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00016  *  Boston, MA 02110-1301, USA.
00017  **/
00018 
00019 #include "ksycocadict_p.h"
00020 #include <kservice.h>
00021 #include "ksycocaentry.h"
00022 #include "ksycoca.h"
00023 #include "kdebug.h"
00024 
00025 #include <QtCore/QBitArray>
00026 
00027 namespace
00028 {
00029 struct string_entry {
00030     string_entry(const QString& _key, const KSycocaEntry::Ptr& _payload)
00031       : hash(0), length(_key.length()), keyStr(_key), key(keyStr.unicode()), payload(_payload)
00032     {}
00033     uint hash;
00034     const int length;
00035     const QString keyStr;
00036     const QChar * const key; // always points to keyStr.unicode(); just an optimization
00037     const KSycocaEntry::Ptr payload;
00038 };
00039 }
00040 
00041 class KSycocaDictStringList : public QList<string_entry*>
00042 {
00043 public:
00044    ~KSycocaDictStringList() {
00045        qDeleteAll(*this);
00046    }
00047 };
00048 
00049 class KSycocaDict::Private
00050 {
00051 public:
00052     Private()
00053         : stringlist( 0 ),
00054           stream( 0 ),
00055           offset( 0 )
00056     {
00057     }
00058 
00059     ~Private()
00060     {
00061         delete stringlist;
00062     }
00063 
00064     // Helper for find_string and findMultiString
00065     qint32 offsetForKey(const QString& key) const;
00066 
00067     // Calculate hash - can be used during loading and during saving.
00068     quint32 hashKey(const QString & key) const;
00069 
00070     KSycocaDictStringList *stringlist;
00071     QDataStream *stream;
00072     qint64 offset;
00073     quint32 hashTableSize;
00074     QList<qint32> hashList;
00075 };
00076 
00077 KSycocaDict::KSycocaDict()
00078   : d( new Private )
00079 {
00080 }
00081 
00082 KSycocaDict::KSycocaDict(QDataStream *str, int offset)
00083   : d( new Private )
00084 {
00085    d->stream = str;
00086    d->offset = offset;
00087 
00088    quint32 test1, test2;
00089    str->device()->seek(offset);
00090    (*str) >> test1 >> test2;
00091    if ((test1 > 0x000fffff) || (test2 > 1024))
00092    {
00093        KSycoca::flagError();
00094        d->hashTableSize = 0;
00095        d->offset = 0;
00096        return;
00097    }
00098 
00099    str->device()->seek(offset);
00100    (*str) >> d->hashTableSize;
00101    (*str) >> d->hashList;
00102    d->offset = str->device()->pos(); // Start of hashtable
00103 }
00104 
00105 KSycocaDict::~KSycocaDict()
00106 {
00107    delete d;
00108 }
00109 
00110 void
00111 KSycocaDict::add(const QString &key, const KSycocaEntry::Ptr& payload)
00112 {
00113    if (key.isEmpty()) return; // Not allowed (should never happen)
00114    if (!payload) return; // Not allowed!
00115    if (!d->stringlist)
00116    {
00117        d->stringlist = new KSycocaDictStringList;
00118    }
00119 
00120    string_entry *entry = new string_entry(key, payload);
00121    d->stringlist->append(entry);
00122 }
00123 
00124 void
00125 KSycocaDict::remove(const QString &key)
00126 {
00127     if (!d || !d->stringlist) {
00128         return;
00129     }
00130 
00131    bool found = false;
00132    for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) {
00133       string_entry* entry = *it;
00134       if (entry->keyStr == key) {
00135          d->stringlist->erase(it);
00136          delete entry;
00137          found = true;
00138          break;
00139       }
00140    }
00141    if (!found) {
00142        kWarning(7011) << "key not found:" << key;
00143    }
00144 }
00145 
00146 int KSycocaDict::find_string(const QString &key ) const
00147 {
00148     Q_ASSERT(d);
00149 
00150     //kDebug(7011) << QString("KSycocaDict::find_string(%1)").arg(key);
00151     qint32 offset = d->offsetForKey(key);
00152 
00153     //kDebug(7011) << QString("offset is %1").arg(offset,8,16);
00154     if (offset == 0)
00155         return 0;
00156 
00157     if (offset > 0)
00158         return offset; // Positive ID
00159 
00160     // Lookup duplicate list.
00161     offset = -offset;
00162 
00163     d->stream->device()->seek(offset);
00164     //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16);
00165 
00166    while(true)
00167    {
00168        (*d->stream) >> offset;
00169        if (offset == 0) break;
00170        QString dupkey;
00171        (*d->stream) >> dupkey;
00172        //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey);
00173        if (dupkey == key) return offset;
00174    }
00175    //kWarning(7011) << "Not found!";
00176 
00177    return 0;
00178 }
00179 
00180 
00181 QList<int> KSycocaDict::findMultiString(const QString &key ) const
00182 {
00183     qint32 offset = d->offsetForKey(key);
00184     QList<int> offsetList;
00185     if (offset == 0)
00186         return offsetList;
00187 
00188     if (offset > 0) { // Positive ID: one entry found
00189         offsetList.append(offset);
00190         return offsetList;
00191     }
00192 
00193     // Lookup duplicate list.
00194     offset = -offset;
00195 
00196     d->stream->device()->seek(offset);
00197     //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16);
00198 
00199     while(true)
00200     {
00201         (*d->stream) >> offset;
00202         if (offset == 0) break;
00203         QString dupkey;
00204         (*d->stream) >> dupkey;
00205         //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey);
00206         if (dupkey == key)
00207             offsetList.append(offset);
00208     }
00209     return offsetList;
00210 }
00211 
00212 uint KSycocaDict::count() const
00213 {
00214    if ( !d || !d->stringlist ) return 0;
00215 
00216    return d->stringlist->count();
00217 }
00218 
00219 void
00220 KSycocaDict::clear()
00221 {
00222    delete d;
00223    d = 0;
00224 }
00225 
00226 uint KSycocaDict::Private::hashKey( const QString &key) const
00227 {
00228    int len = key.length();
00229    uint h = 0;
00230 
00231    for(int i = 0; i < hashList.count(); i++)
00232    {
00233       int pos = hashList[i];
00234       if (pos < 0) {
00235          pos = -pos-1;
00236          if (pos < len)
00237             h = ((h * 13) + (key[len-pos].cell() % 29)) & 0x3ffffff;
00238       } else {
00239          pos = pos-1;
00240          if (pos < len)
00241             h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff;
00242       }
00243    }
00244    return h;
00245 }
00246 
00247 // If we have the strings
00248 //    hello
00249 //    world
00250 //    kde
00251 // Then we end up with
00252 //    ABCDE
00253 // where A = diversity of 'h' + 'w' + 'k' etc.
00254 // Also, diversity(-2) == 'l'+'l'+'d' (second character from the end)
00255 
00256 // The hasList is used for hashing:
00257 //  hashList = (-2, 1, 3) means that the hash key comes from
00258 //  the 2nd character from the right, then the 1st from the left, then the 3rd from the left.
00259 
00260 // Calculate the diversity of the strings at position 'pos'
00261 // NOTE: this code is slow, it takes 12% of the _overall_ `kbuildsycoca4 --noincremental` running time
00262 static int
00263 calcDiversity(KSycocaDictStringList *stringlist, int inPos, uint sz)
00264 {
00265     if (inPos == 0) return 0;
00266     QBitArray matrix(sz);
00267     int pos;
00268 
00269     //static const int s_maxItems = 50;
00270     //int numItem = 0;
00271 
00272     if (inPos < 0) {
00273         pos = -inPos-1;
00274         for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
00275         {
00276             string_entry* entry = *it;
00277             int len = entry->length;
00278             if (pos < len && pos != 0) {
00279                 uint hash = ((entry->hash * 13) + (entry->key[len-pos].cell() % 29)) & 0x3ffffff;
00280                 matrix.setBit( hash % sz, true );
00281             }
00282             //if (++numItem == s_maxItems)
00283             //    break;
00284         }
00285     } else {
00286         pos = inPos-1;
00287         for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(), end = stringlist->constEnd(); it != end; ++it)
00288         {
00289             string_entry* entry = *it;
00290             if (pos < entry->length) {
00291                 uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff;
00292                 matrix.setBit( hash % sz, true );
00293             }
00294             //if (++numItem == s_maxItems)
00295             //    break;
00296         }
00297     }
00298     return matrix.count(true);
00299 }
00300 
00301 //
00302 // Add the diversity of the strings at position 'pos'
00303 static void
00304 addDiversity(KSycocaDictStringList *stringlist, int pos)
00305 {
00306    if (pos == 0) return;
00307    if (pos < 0) {
00308       pos = -pos-1;
00309       for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(); it != stringlist->constEnd(); ++it)
00310       {
00311          string_entry* entry = *it;
00312          register int l = entry->length;
00313          if (pos < l)
00314             entry->hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3fffffff;
00315       }
00316    } else {
00317       pos = pos - 1;
00318       for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(); it != stringlist->constEnd(); ++it)
00319       {
00320          string_entry* entry = *it;
00321          if (pos < entry->length)
00322             entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff;
00323       }
00324    }
00325 }
00326 
00327 
00328 void
00329 KSycocaDict::save(QDataStream &str)
00330 {
00331    if (count() == 0)
00332    {
00333       d->hashTableSize = 0;
00334       d->hashList.clear();
00335       str << d->hashTableSize;
00336       str << d->hashList;
00337       return;
00338    }
00339 
00340    d->offset = str.device()->pos();
00341 
00342    //kDebug(7011) << "KSycocaDict:" << count() << "entries.";
00343 
00344    //kDebug(7011) << "Calculating hash keys..";
00345 
00346    int maxLength = 0;
00347    //kDebug(7011) << "Finding maximum string length";
00348    for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it)
00349    {
00350       string_entry* entry = *it;
00351       entry->hash = 0;
00352       if (entry->length > maxLength)
00353          maxLength = entry->length;
00354    }
00355 
00356    //kDebug(7011) << "Max string length=" << maxLength << "existing hashList=" << d->hashList;
00357 
00358    // use "almost prime" number for sz (to calculate diversity) and later
00359    // for the table size of big tables
00360    // int sz = d->stringlist->count()*5-1;
00361    register unsigned int sz = count()*4 + 1;
00362    while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13))))
00363       sz+=2;
00364 
00365    d->hashList.clear();
00366 
00367    // Times (with warm caches, i.e. after multiple runs)
00368    // kbuildsycoca4 --noincremental  2.83s user 0.20s system 95% cpu 3.187 total
00369    // kbuildsycoca4 --noincremental  2.74s user 0.25s system 93% cpu 3.205 total
00370    // unittest: 0.50-60 msec per iteration / 0.40-50 msec per iteration
00371 
00372    // Now that MimeTypes are not parsed anymore:
00373    // kbuildsycoca4 --noincremental  2.18s user 0.30s system 91% cpu 2.719 total
00374    // kbuildsycoca4 --noincremental  2.07s user 0.34s system 89% cpu 2.681 total
00375 
00376    // If I enabled s_maxItems = 50, it goes down to
00377    // but I don't know if that's a good idea.
00378    // kbuildsycoca4 --noincremental  1.73s user 0.31s system 85% cpu 2.397 total
00379    // kbuildsycoca4 --noincremental  1.84s user 0.29s system 95% cpu 2.230 total
00380 
00381    // try to limit diversity scan by "predicting" positions
00382    // with high diversity
00383    QVector<int> oldvec(maxLength*2+1);
00384    oldvec.fill(0);
00385    int mindiv=0;
00386    int lastDiv = 0;
00387 
00388    while(true)
00389    {
00390       int divsum=0,divnum=0;
00391 
00392       int maxDiv = 0;
00393       int maxPos = 0;
00394       for (int pos = -maxLength; pos <= maxLength; ++pos) {
00395          // cut off
00396          if (oldvec[pos+maxLength] < mindiv) { oldvec[pos+maxLength]=0; continue; }
00397 
00398          const int diversity = calcDiversity(d->stringlist, pos, sz);
00399          if (diversity > maxDiv) {
00400             maxDiv = diversity;
00401             maxPos = pos;
00402          }
00403          oldvec[pos + maxLength] = diversity;
00404          divsum += diversity;
00405          ++divnum;
00406       }
00407       // arbitrary cut-off value 3/4 of average seems to work
00408       if (divnum)
00409          mindiv=(3*divsum)/(4*divnum);
00410 
00411       if (maxDiv <= lastDiv)
00412          break;
00413       //kDebug() << "Max Div=" << maxDiv << "at pos" << maxPos;
00414       lastDiv = maxDiv;
00415       addDiversity(d->stringlist, maxPos);
00416       d->hashList.append(maxPos);
00417    }
00418 
00419 
00420    for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) {
00421       (*it)->hash = d->hashKey((*it)->keyStr);
00422    }
00423 // fprintf(stderr, "Calculating minimum table size..\n");
00424 
00425    d->hashTableSize = sz;
00426 
00427    //kDebug() << "hashTableSize=" << sz << "hashList=" << d->hashList << "oldvec=" << oldvec;
00428 
00429    struct hashtable_entry {
00430       string_entry *entry;
00431       QList<string_entry*>* duplicates;
00432       qint64 duplicate_offset;
00433    };
00434 
00435    hashtable_entry *hashTable = new hashtable_entry[ sz ];
00436 
00437    //kDebug(7011) << "Clearing hashtable...";
00438    for (unsigned int i=0; i < sz; i++)
00439    {
00440       hashTable[i].entry = 0;
00441       hashTable[i].duplicates = 0;
00442    }
00443 
00444    //kDebug(7011) << "Filling hashtable...";
00445    for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it)
00446    {
00447       string_entry* entry = *it;
00448       //kDebug(7011) << "entry keyStr=" << entry->keyStr << entry->payload.data() << entry->payload->entryPath();
00449       int hash = entry->hash % sz;
00450       if (!hashTable[hash].entry)
00451       { // First entry
00452          hashTable[hash].entry = entry;
00453       }
00454       else
00455       {
00456          if (!hashTable[hash].duplicates)
00457          { // Second entry, build duplicate list.
00458             hashTable[hash].duplicates = new QList<string_entry*>;
00459             hashTable[hash].duplicates->append(hashTable[hash].entry);
00460             hashTable[hash].duplicate_offset = 0;
00461          }
00462          hashTable[hash].duplicates->append(entry);
00463       }
00464    }
00465 
00466    str << d->hashTableSize;
00467    str << d->hashList;
00468 
00469    d->offset = str.device()->pos(); // d->offset points to start of hashTable
00470    //kDebug(7011) << QString("Start of Hash Table, offset = %1").arg(d->offset,8,16);
00471 
00472    // Write the hashtable + the duplicates twice.
00473    // The duplicates are after the normal hashtable, but the offset of each
00474    // duplicate entry is written into the normal hashtable.
00475    for(int pass = 1; pass <= 2; pass++)
00476    {
00477       str.device()->seek(d->offset);
00478       //kDebug(7011) << QString("Writing hash table (pass #%1)").arg(pass);
00479       for(uint i=0; i < d->hashTableSize; i++)
00480       {
00481          qint32 tmpid;
00482          if (!hashTable[i].entry)
00483             tmpid = (qint32) 0;
00484          else if (!hashTable[i].duplicates)
00485             tmpid = (qint32) hashTable[i].entry->payload->offset(); // Positive ID
00486          else
00487             tmpid = (qint32) -hashTable[i].duplicate_offset; // Negative ID
00488          str << tmpid;
00489          //kDebug(7011) << QString("Hash table : %1").arg(tmpid,8,16);
00490       }
00491       //kDebug(7011) << QString("End of Hash Table, offset = %1").arg(str.device()->at(),8,16);
00492 
00493       //kDebug(7011) << QString("Writing duplicate lists (pass #%1)").arg(pass);
00494       for(uint i=0; i < d->hashTableSize; i++)
00495       {
00496          const QList<string_entry*> *dups = hashTable[i].duplicates;
00497          if (dups)
00498          {
00499             hashTable[i].duplicate_offset = str.device()->pos();
00500 
00501             /*kDebug(7011) << QString("Duplicate lists: Offset = %1 list_size = %2")                           .arg(hashTable[i].duplicate_offset,8,16).arg(dups->count());
00502 */
00503         for(QList<string_entry*>::ConstIterator dup = dups->begin(); dup != dups->end(); ++dup)
00504             {
00505                const qint32 offset = (*dup)->payload->offset();
00506                if (!offset) {
00507                    const QString storageId = (*dup)->payload->storageId();
00508                    kDebug() << "about to assert! dict=" << this << "storageId=" << storageId << (*dup)->payload.data();
00509                    if ((*dup)->payload->isType(KST_KService)) {
00510                        KService::Ptr service = KService::Ptr::staticCast((*dup)->payload);
00511                        kDebug() << service->storageId() << service->entryPath();
00512                    }
00513                    // save() must have been called on the entry
00514                    Q_ASSERT_X( offset, "KSycocaDict::save",
00515                                QByteArray("entry offset is 0, save() was not called on "
00516                                + (*dup)->payload->storageId().toLatin1()
00517                                + " entryPath="
00518                                + (*dup)->payload->entryPath().toLatin1())
00519                        );
00520                }
00521                str << offset ;                       // Positive ID
00522                str << (*dup)->keyStr;                // Key (QString)
00523             }
00524             str << (qint32) 0;               // End of list marker (0)
00525          }
00526       }
00527       //kDebug(7011) << QString("End of Dict, offset = %1").arg(str.device()->at(),8,16);
00528    }
00529 
00530    //kDebug(7011) << "Cleaning up hash table.";
00531    for(uint i=0; i < d->hashTableSize; i++)
00532    {
00533       delete hashTable[i].duplicates;
00534    }
00535    delete [] hashTable;
00536 }
00537 
00538 qint32 KSycocaDict::Private::offsetForKey(const QString& key) const
00539 {
00540    if ( !stream || !offset )
00541    {
00542       kError() << "No ksycoca4 database available!" << endl;
00543       return 0;
00544    }
00545 
00546    if (hashTableSize == 0)
00547       return 0; // Unlikely to find anything :-]
00548 
00549    // Read hash-table data
00550    const uint hash = hashKey(key) % hashTableSize;
00551    //kDebug(7011) << "hash is" << hash;
00552 
00553    const qint32 off = offset+sizeof(qint32)*hash;
00554    //kDebug(7011) << QString("off is %1").arg(off,8,16);
00555    stream->device()->seek( off );
00556 
00557    qint32 retOffset;
00558    (*stream) >> retOffset;
00559    return retOffset;
00560 }

KDECore

Skip menu "KDECore"
  • Main Page
  • Namespace List
  • Namespace Members
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Modules
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.7.5
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal