• Skip to content
  • Skip to link menu
KDE 4.6 API Reference
  • KDE API Reference
  • kdelibs
  • KDE Home
  • Contact Us
 

KDECore

ksycocadict.cpp

Go to the documentation of this file.
00001 /*  This file is part of the KDE libraries
00002  *  Copyright (C) 1999 Waldo Bastian <bastian@kde.org>
00003  *
00004  *  This library is free software; you can redistribute it and/or
00005  *  modify it under the terms of the GNU Library General Public
00006  *  License version 2 as published by the Free Software Foundation;
00007  *
00008  *  This library is distributed in the hope that it will be useful,
00009  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00010  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00011  *  Library General Public License for more details.
00012  *
00013  *  You should have received a copy of the GNU Library General Public License
00014  *  along with this library; see the file COPYING.LIB.  If not, write to
00015  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00016  *  Boston, MA 02110-1301, USA.
00017  **/
00018 
00019 #include "ksycocadict_p.h"
00020 #include <kservice.h>
00021 #include "ksycocaentry.h"
00022 #include "ksycoca.h"
00023 #include "kdebug.h"
00024 
00025 #include <QtCore/QBitArray>
00026 
00027 namespace
00028 {
00029 struct string_entry {
00030     string_entry(const QString& _key, const KSycocaEntry::Ptr& _payload)
00031       : hash(0), length(_key.length()), keyStr(_key), key(keyStr.unicode()), payload(_payload)
00032     {}
00033     uint hash;
00034     const int length;
00035     const QString keyStr;
00036     const QChar * const key; // always points to keyStr.unicode(); just an optimization
00037     const KSycocaEntry::Ptr payload;
00038 };
00039 }
00040 
00041 class KSycocaDictStringList : public QList<string_entry*>
00042 {
00043 public:
00044    ~KSycocaDictStringList() {
00045        qDeleteAll(*this);
00046    }
00047 };
00048 
00049 class KSycocaDict::Private
00050 {
00051 public:
00052     Private()
00053         : stringlist( 0 ),
00054           stream( 0 ),
00055           offset( 0 )
00056     {
00057     }
00058 
00059     ~Private()
00060     {
00061         delete stringlist;
00062     }
00063 
00064     // Helper for find_string and findMultiString
00065     qint32 offsetForKey(const QString& key) const;
00066 
00067     // Calculate hash - can be used during loading and during saving.
00068     quint32 hashKey(const QString & key) const;
00069 
00070     KSycocaDictStringList *stringlist;
00071     QDataStream *stream;
00072     qint64 offset;
00073     quint32 hashTableSize;
00074     QList<qint32> hashList;
00075 };
00076 
00077 KSycocaDict::KSycocaDict()
00078   : d( new Private )
00079 {
00080 }
00081 
00082 KSycocaDict::KSycocaDict(QDataStream *str, int offset)
00083   : d( new Private )
00084 {
00085    d->stream = str;
00086    d->offset = offset;
00087 
00088    quint32 test1, test2;
00089    str->device()->seek(offset);
00090    (*str) >> test1 >> test2;
00091    if ((test1 > 0x000fffff) || (test2 > 1024))
00092    {
00093        KSycoca::flagError();
00094        d->hashTableSize = 0;
00095        d->offset = 0;
00096        return;
00097    }
00098 
00099    str->device()->seek(offset);
00100    (*str) >> d->hashTableSize;
00101    (*str) >> d->hashList;
00102    d->offset = str->device()->pos(); // Start of hashtable
00103 }
00104 
00105 KSycocaDict::~KSycocaDict()
00106 {
00107    delete d;
00108 }
00109 
00110 void
00111 KSycocaDict::add(const QString &key, const KSycocaEntry::Ptr& payload)
00112 {
00113    if (key.isEmpty()) return; // Not allowed (should never happen)
00114    if (!payload) return; // Not allowed!
00115    if (!d->stringlist)
00116    {
00117        d->stringlist = new KSycocaDictStringList;
00118    }
00119 
00120    string_entry *entry = new string_entry(key, payload);
00121    d->stringlist->append(entry);
00122 }
00123 
00124 void
00125 KSycocaDict::remove(const QString &key)
00126 {
00127     if (!d || !d->stringlist) {
00128         return;
00129     }
00130 
00131    bool found = false;
00132    for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) {
00133       string_entry* entry = *it;
00134       if (entry->keyStr == key) {
00135          d->stringlist->erase(it);
00136          delete entry;
00137          found = true;
00138          break;
00139       }
00140    }
00141    if (!found) {
00142        kWarning(7011) << "key not found:" << key;
00143    }
00144 }
00145 
00146 int KSycocaDict::find_string(const QString &key ) const
00147 {
00148     Q_ASSERT(d);
00149 
00150     //kDebug(7011) << QString("KSycocaDict::find_string(%1)").arg(key);
00151     qint32 offset = d->offsetForKey(key);
00152 
00153     //kDebug(7011) << QString("offset is %1").arg(offset,8,16);
00154     if (offset == 0)
00155         return 0;
00156 
00157     if (offset > 0)
00158         return offset; // Positive ID
00159 
00160     // Lookup duplicate list.
00161     offset = -offset;
00162 
00163     d->stream->device()->seek(offset);
00164     //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16);
00165 
00166    while(true)
00167    {
00168        (*d->stream) >> offset;
00169        if (offset == 0) break;
00170        QString dupkey;
00171        (*d->stream) >> dupkey;
00172        //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey);
00173        if (dupkey == key) return offset;
00174    }
00175    //kWarning(7011) << "Not found!";
00176 
00177    return 0;
00178 }
00179 
00180 
00181 QList<int> KSycocaDict::findMultiString(const QString &key ) const
00182 {
00183     qint32 offset = d->offsetForKey(key);
00184     QList<int> offsetList;
00185     if (offset == 0)
00186         return offsetList;
00187 
00188     if (offset > 0) { // Positive ID: one entry found
00189         offsetList.append(offset);
00190         return offsetList;
00191     }
00192 
00193     // Lookup duplicate list.
00194     offset = -offset;
00195 
00196     d->stream->device()->seek(offset);
00197     //kDebug(7011) << QString("Looking up duplicate list at %1").arg(offset,8,16);
00198 
00199     while(true)
00200     {
00201         (*d->stream) >> offset;
00202         if (offset == 0) break;
00203         QString dupkey;
00204         (*d->stream) >> dupkey;
00205         //kDebug(7011) << QString(">> %1 %2").arg(offset,8,16).arg(dupkey);
00206         if (dupkey == key)
00207             offsetList.append(offset);
00208     }
00209     return offsetList;
00210 }
00211 
00212 uint KSycocaDict::count() const
00213 {
00214    if ( !d || !d->stringlist ) return 0;
00215 
00216    return d->stringlist->count();
00217 }
00218 
00219 void
00220 KSycocaDict::clear()
00221 {
00222    delete d;
00223    d = 0;
00224 }
00225 
00226 uint KSycocaDict::Private::hashKey( const QString &key) const
00227 {
00228    int l = key.length();
00229    register uint h = 0;
00230 
00231    for(int i = 0; i < hashList.count(); i++)
00232    {
00233       int pos = hashList[i];
00234       if (pos < 0) {
00235          pos = -pos-1;
00236          if (pos < l)
00237             h = ((h * 13) + (key[l-pos].cell() % 29)) & 0x3ffffff;
00238       } else {
00239          pos = pos-1;
00240          if (pos < l)
00241             h = ((h * 13) + (key[pos].cell() % 29)) & 0x3ffffff;
00242       }
00243    }
00244    return h;
00245 }
00246 
00247 // If we have the strings
00248 //    hello
00249 //    world
00250 //    kde
00251 // Then we end up with
00252 //    ABCDE
00253 // where A = diversity of 'h' + 'w' + 'k' etc.
00254 // Also, diversity(-2) == 'l'+'l'+'d' (second character from the end)
00255 
00256 // The hasList is used for hashing:
00257 //  hashList = (-2, 1, 3) means that the hash key comes from
00258 //  the 2nd character from the right, then the 1st from the left, then the 3rd from the left.
00259 
00260 // Calculate the diversity of the strings at position 'pos'
00261 // NOTE: this code is slow, it takes 30% of the _overall_ `kbuildsycoca4 --noincremental` running time
00262 static int
00263 calcDiversity(KSycocaDictStringList *stringlist, int inPos, uint sz)
00264 {
00265     if (inPos == 0) return 0;
00266     QBitArray matrix(sz);
00267     int pos;
00268 
00269     //static const int s_maxItems = 50;
00270     //int numItem = 0;
00271 
00272     if (inPos < 0) {
00273         pos = -inPos-1;
00274         for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(); it != stringlist->constEnd(); ++it)
00275         {
00276             string_entry* entry = *it;
00277             register int l = entry->length;
00278             if (pos < l && pos != 0) {
00279                 register uint hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3ffffff;
00280                 matrix.setBit( hash % sz, true );
00281             }
00282             //if (++numItem == s_maxItems)
00283             //    break;
00284         }
00285     } else {
00286         pos = inPos-1;
00287         for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(); it != stringlist->constEnd(); ++it)
00288         {
00289             string_entry* entry = *it;
00290             if (pos < entry->length) {
00291                 register uint hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3ffffff;
00292                 matrix.setBit( hash % sz, true );
00293             }
00294             //if (++numItem == s_maxItems)
00295             //    break;
00296         }
00297     }
00298     int diversity = 0;
00299     for (uint i=0;i< sz;++i)
00300         if (matrix.testBit(i))
00301             ++diversity;
00302 
00303     //kDebug() << "inPos=" << inPos << "pos=" << pos << "diversity=" << diversity;
00304 
00305     return diversity;
00306 }
00307 
00308 //
00309 // Add the diversity of the strings at position 'pos'
00310 static void
00311 addDiversity(KSycocaDictStringList *stringlist, int pos)
00312 {
00313    if (pos == 0) return;
00314    if (pos < 0) {
00315       pos = -pos-1;
00316       for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(); it != stringlist->constEnd(); ++it)
00317       {
00318          string_entry* entry = *it;
00319          register int l = entry->length;
00320          if (pos < l)
00321             entry->hash = ((entry->hash * 13) + (entry->key[l-pos].cell() % 29)) & 0x3fffffff;
00322       }
00323    } else {
00324       pos = pos - 1;
00325       for(KSycocaDictStringList::const_iterator it = stringlist->constBegin(); it != stringlist->constEnd(); ++it)
00326       {
00327          string_entry* entry = *it;
00328          if (pos < entry->length)
00329             entry->hash = ((entry->hash * 13) + (entry->key[pos].cell() % 29)) & 0x3fffffff;
00330       }
00331    }
00332 }
00333 
00334 
00335 void
00336 KSycocaDict::save(QDataStream &str)
00337 {
00338    if (count() == 0)
00339    {
00340       d->hashTableSize = 0;
00341       d->hashList.clear();
00342       str << d->hashTableSize;
00343       str << d->hashList;
00344       return;
00345    }
00346 
00347    d->offset = str.device()->pos();
00348 
00349    //kDebug(7011) << "KSycocaDict:" << count() << "entries.";
00350 
00351    //kDebug(7011) << "Calculating hash keys..";
00352 
00353    int maxLength = 0;
00354    //kDebug(7011) << "Finding maximum string length";
00355    for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it)
00356    {
00357       string_entry* entry = *it;
00358       entry->hash = 0;
00359       if (entry->length > maxLength)
00360          maxLength = entry->length;
00361    }
00362 
00363    //kDebug(7011) << "Max string length=" << maxLength << "existing hashList=" << d->hashList;
00364 
00365    // use "almost prime" number for sz (to calculate diversity) and later
00366    // for the table size of big tables
00367    // int sz = d->stringlist->count()*5-1;
00368    register unsigned int sz = count()*4 + 1;
00369    while(!(((sz % 3) && (sz % 5) && (sz % 7) && (sz % 11) && (sz % 13))))
00370       sz+=2;
00371 
00372    d->hashList.clear();
00373 
00374    // Times (with warm caches, i.e. after multiple runs)
00375    // kbuildsycoca4 --noincremental  2.83s user 0.20s system 95% cpu 3.187 total
00376    // kbuildsycoca4 --noincremental  2.74s user 0.25s system 93% cpu 3.205 total
00377    // unittest: 0.50-60 msec per iteration / 0.40-50 msec per iteration
00378 
00379    // Now that MimeTypes are not parsed anymore:
00380    // kbuildsycoca4 --noincremental  2.18s user 0.30s system 91% cpu 2.719 total
00381    // kbuildsycoca4 --noincremental  2.07s user 0.34s system 89% cpu 2.681 total
00382 
00383    // If I enabled s_maxItems = 50, it goes down to
00384    // but I don't know if that's a good idea.
00385    // kbuildsycoca4 --noincremental  1.73s user 0.31s system 85% cpu 2.397 total
00386    // kbuildsycoca4 --noincremental  1.84s user 0.29s system 95% cpu 2.230 total
00387 
00388    // try to limit diversity scan by "predicting" positions
00389    // with high diversity
00390    QVector<int> oldvec(maxLength*2+1);
00391    oldvec.fill(0);
00392    int mindiv=0;
00393    int lastDiv = 0;
00394 
00395    while(true)
00396    {
00397       int divsum=0,divnum=0;
00398 
00399       int maxDiv = 0;
00400       int maxPos = 0;
00401       for (int pos = -maxLength; pos <= maxLength; ++pos) {
00402          // cut off
00403          if (oldvec[pos+maxLength] < mindiv) { oldvec[pos+maxLength]=0; continue; }
00404 
00405          const int diversity = calcDiversity(d->stringlist, pos, sz);
00406          if (diversity > maxDiv) {
00407             maxDiv = diversity;
00408             maxPos = pos;
00409          }
00410          oldvec[pos + maxLength] = diversity;
00411          divsum += diversity;
00412          ++divnum;
00413       }
00414       // arbitrary cut-off value 3/4 of average seems to work
00415       if (divnum)
00416          mindiv=(3*divsum)/(4*divnum);
00417 
00418       if (maxDiv <= lastDiv)
00419          break;
00420       //kDebug() << "Max Div=" << maxDiv << "at pos" << maxPos;
00421       lastDiv = maxDiv;
00422       addDiversity(d->stringlist, maxPos);
00423       d->hashList.append(maxPos);
00424    }
00425 
00426 
00427    for(KSycocaDictStringList::Iterator it = d->stringlist->begin(); it != d->stringlist->end(); ++it) {
00428       (*it)->hash = d->hashKey((*it)->keyStr);
00429    }
00430 // fprintf(stderr, "Calculating minimum table size..\n");
00431 
00432    d->hashTableSize = sz;
00433 
00434    //kDebug() << "hashTableSize=" << sz << "hashList=" << d->hashList << "oldvec=" << oldvec;
00435 
00436    struct hashtable_entry {
00437       string_entry *entry;
00438       QList<string_entry*>* duplicates;
00439       qint64 duplicate_offset;
00440    };
00441 
00442    hashtable_entry *hashTable = new hashtable_entry[ sz ];
00443 
00444    //kDebug(7011) << "Clearing hashtable...";
00445    for (unsigned int i=0; i < sz; i++)
00446    {
00447       hashTable[i].entry = 0;
00448       hashTable[i].duplicates = 0;
00449    }
00450 
00451    //kDebug(7011) << "Filling hashtable...";
00452    for(KSycocaDictStringList::const_iterator it = d->stringlist->constBegin(); it != d->stringlist->constEnd(); ++it)
00453    {
00454       string_entry* entry = *it;
00455       //kDebug(7011) << "entry keyStr=" << entry->keyStr << entry->payload.data() << entry->payload->entryPath();
00456       int hash = entry->hash % sz;
00457       if (!hashTable[hash].entry)
00458       { // First entry
00459          hashTable[hash].entry = entry;
00460       }
00461       else
00462       {
00463          if (!hashTable[hash].duplicates)
00464          { // Second entry, build duplicate list.
00465             hashTable[hash].duplicates = new QList<string_entry*>;
00466             hashTable[hash].duplicates->append(hashTable[hash].entry);
00467             hashTable[hash].duplicate_offset = 0;
00468          }
00469          hashTable[hash].duplicates->append(entry);
00470       }
00471    }
00472 
00473    str << d->hashTableSize;
00474    str << d->hashList;
00475 
00476    d->offset = str.device()->pos(); // d->offset points to start of hashTable
00477    //kDebug(7011) << QString("Start of Hash Table, offset = %1").arg(d->offset,8,16);
00478 
00479    // Write the hashtable + the duplicates twice.
00480    // The duplicates are after the normal hashtable, but the offset of each
00481    // duplicate entry is written into the normal hashtable.
00482    for(int pass = 1; pass <= 2; pass++)
00483    {
00484       str.device()->seek(d->offset);
00485       //kDebug(7011) << QString("Writing hash table (pass #%1)").arg(pass);
00486       for(uint i=0; i < d->hashTableSize; i++)
00487       {
00488          qint32 tmpid;
00489          if (!hashTable[i].entry)
00490             tmpid = (qint32) 0;
00491          else if (!hashTable[i].duplicates)
00492             tmpid = (qint32) hashTable[i].entry->payload->offset(); // Positive ID
00493          else
00494             tmpid = (qint32) -hashTable[i].duplicate_offset; // Negative ID
00495          str << tmpid;
00496          //kDebug(7011) << QString("Hash table : %1").arg(tmpid,8,16);
00497       }
00498       //kDebug(7011) << QString("End of Hash Table, offset = %1").arg(str.device()->at(),8,16);
00499 
00500       //kDebug(7011) << QString("Writing duplicate lists (pass #%1)").arg(pass);
00501       for(uint i=0; i < d->hashTableSize; i++)
00502       {
00503          const QList<string_entry*> *dups = hashTable[i].duplicates;
00504          if (dups)
00505          {
00506             hashTable[i].duplicate_offset = str.device()->pos();
00507 
00508             /*kDebug(7011) << QString("Duplicate lists: Offset = %1 list_size = %2")                           .arg(hashTable[i].duplicate_offset,8,16).arg(dups->count());
00509 */
00510         for(QList<string_entry*>::ConstIterator dup = dups->begin(); dup != dups->end(); ++dup)
00511             {
00512                const qint32 offset = (*dup)->payload->offset();
00513                if (!offset) {
00514                    const QString storageId = (*dup)->payload->storageId();
00515                    kDebug() << "about to assert! dict=" << this << "storageId=" << storageId << (*dup)->payload.data();
00516                    if ((*dup)->payload->isType(KST_KService)) {
00517                        KService::Ptr service = KService::Ptr::staticCast((*dup)->payload);
00518                        kDebug() << service->storageId() << service->entryPath();
00519                    }
00520                    // save() must have been called on the entry
00521                    Q_ASSERT_X( offset, "KSycocaDict::save",
00522                                QByteArray("entry offset is 0, save() was not called on ")
00523                                + (*dup)->payload->storageId().toLatin1()
00524                                + " entryPath="
00525                                + (*dup)->payload->entryPath().toLatin1()
00526                        );
00527                }
00528                str << offset ;                       // Positive ID
00529                str << (*dup)->keyStr;                // Key (QString)
00530             }
00531             str << (qint32) 0;               // End of list marker (0)
00532          }
00533       }
00534       //kDebug(7011) << QString("End of Dict, offset = %1").arg(str.device()->at(),8,16);
00535    }
00536 
00537    //kDebug(7011) << "Cleaning up hash table.";
00538    for(uint i=0; i < d->hashTableSize; i++)
00539    {
00540       delete hashTable[i].duplicates;
00541    }
00542    delete [] hashTable;
00543 }
00544 
00545 qint32 KSycocaDict::Private::offsetForKey(const QString& key) const
00546 {
00547    if ( !stream || !offset )
00548    {
00549       kError() << "No ksycoca4 database available!" << endl;
00550       return 0;
00551    }
00552 
00553    if (hashTableSize == 0)
00554       return 0; // Unlikely to find anything :-]
00555 
00556    // Read hash-table data
00557    const uint hash = hashKey(key) % hashTableSize;
00558    //kDebug(7011) << "hash is" << hash;
00559 
00560    const qint32 off = offset+sizeof(qint32)*hash;
00561    //kDebug(7011) << QString("off is %1").arg(off,8,16);
00562    stream->device()->seek( off );
00563 
00564    qint32 retOffset;
00565    (*stream) >> retOffset;
00566    return retOffset;
00567 }

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.7.3
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal