• Skip to content
  • Skip to link menu
KDE 4.6 API Reference
  • KDE API Reference
  • kdelibs
  • KDE Home
  • Contact Us
 

KDECore

kstringhandler.cpp

Go to the documentation of this file.
00001 /* This file is part of the KDE libraries
00002    Copyright (C) 1999 Ian Zepp (icszepp@islc.net)
00003    Copyright (C) 2006 by Dominic Battre <dominic@battre.de>
00004    Copyright (C) 2006 by Martin Pool <mbp@canonical.com>
00005 
00006    This library is free software; you can redistribute it and/or
00007    modify it under the terms of the GNU Library General Public
00008    License as published by the Free Software Foundation; either
00009    version 2 of the License, or (at your option) any later version.
00010 
00011    This library is distributed in the hope that it will be useful,
00012    but WITHOUT ANY WARRANTY; without even the implied warranty of
00013    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00014    Library General Public License for more details.
00015 
00016    You should have received a copy of the GNU Library General Public License
00017    along with this library; see the file COPYING.LIB.  If not, write to
00018    the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00019    Boston, MA 02110-1301, USA.
00020 */
00021 
00022 #include "kstringhandler.h"
00023 
00024 #include <stdlib.h>     // random()
00025 
00026 #include <kglobal.h>
00027 
00028 #include <QtCore/QRegExp>            // for the word ranges
00029 #include <QtCore/QCharRef>
00030 #include <QtCore/QMutableStringListIterator>
00031 
00032 
00033 
00034 //
00035 // Capitalization routines
00036 //
00037 QString KStringHandler::capwords( const QString &text )
00038 {
00039     if ( text.isEmpty() ) {
00040         return text;
00041     }
00042 
00043     const QString strippedText = text.trimmed();
00044     const QString space = QString(QLatin1Char(' '));
00045     const QStringList words = capwords(strippedText.split(space));
00046 
00047     QString result = text;
00048     result.replace(strippedText, words.join(space));
00049     return result;
00050 }
00051 
00052 QStringList KStringHandler::capwords( const QStringList &list )
00053 {
00054     QStringList tmp = list;
00055     for ( QStringList::Iterator it = tmp.begin(); it != tmp.end(); ++it ) {
00056         *it = ( *it )[ 0 ].toUpper() + ( *it ).mid( 1 );
00057     }
00058     return tmp;
00059 }
00060 
00061 
00062 QString KStringHandler::lsqueeze( const QString & str, int maxlen )
00063 {
00064   if (str.length() > maxlen) {
00065     int part = maxlen-3;
00066     return QString::fromLatin1("...") + str.right(part);
00067   }
00068   else return str;
00069 }
00070 
00071 QString KStringHandler::csqueeze( const QString & str, int maxlen )
00072 {
00073   if (str.length() > maxlen && maxlen > 3) {
00074     const int part = (maxlen-3)/2;
00075     return str.left(part) + QLatin1String("...") + str.right(part);
00076   }
00077   else return str;
00078 }
00079 
00080 QString KStringHandler::rsqueeze( const QString & str, int maxlen )
00081 {
00082   if (str.length() > maxlen) {
00083     int part = maxlen-3;
00084     return str.left(part) + QLatin1String("...");
00085   }
00086   else return str;
00087 }
00088 
00089 QStringList KStringHandler::perlSplit(const QString & sep, const QString & s, int max)
00090 {
00091   bool ignoreMax = 0 == max;
00092 
00093   QStringList l;
00094 
00095   int searchStart = 0;
00096 
00097   int tokenStart = s.indexOf(sep, searchStart);
00098 
00099   while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
00100   {
00101     if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
00102       l << s.mid(searchStart, tokenStart - searchStart);
00103 
00104     searchStart = tokenStart + sep.length();
00105     tokenStart = s.indexOf(sep, searchStart);
00106   }
00107 
00108   if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
00109     l << s.mid(searchStart, s.length() - searchStart);
00110 
00111   return l;
00112 }
00113 
00114 QStringList KStringHandler::perlSplit(const QChar & sep, const QString & s, int max)
00115 {
00116   bool ignoreMax = 0 == max;
00117 
00118   QStringList l;
00119 
00120   int searchStart = 0;
00121 
00122   int tokenStart = s.indexOf(sep, searchStart);
00123 
00124   while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
00125   {
00126     if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
00127       l << s.mid(searchStart, tokenStart - searchStart);
00128 
00129     searchStart = tokenStart + 1;
00130     tokenStart = s.indexOf(sep, searchStart);
00131   }
00132 
00133   if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
00134     l << s.mid(searchStart, s.length() - searchStart);
00135 
00136   return l;
00137 }
00138 
00139 QStringList KStringHandler::perlSplit(const QRegExp & sep, const QString & s, int max)
00140 {
00141   bool ignoreMax = 0 == max;
00142 
00143   QStringList l;
00144 
00145   int searchStart = 0;
00146   int tokenStart = sep.indexIn(s, searchStart);
00147   int len = sep.matchedLength();
00148 
00149   while (-1 != tokenStart && (ignoreMax || l.count() < max - 1))
00150   {
00151     if (!s.mid(searchStart, tokenStart - searchStart).isEmpty())
00152       l << s.mid(searchStart, tokenStart - searchStart);
00153 
00154     searchStart = tokenStart + len;
00155     tokenStart = sep.indexIn(s, searchStart);
00156     len = sep.matchedLength();
00157   }
00158 
00159   if (!s.mid(searchStart, s.length() - searchStart).isEmpty())
00160     l << s.mid(searchStart, s.length() - searchStart);
00161 
00162   return l;
00163 }
00164 
00165 QString KStringHandler::tagUrls( const QString& text )
00166 {
00167     /*static*/ QRegExp urlEx(QLatin1String("(www\\.(?!\\.)|(fish|(f|ht)tp(|s))://)[\\d\\w\\./,:_~\\?=&;#@\\-\\+\\%\\$]+[\\d\\w/]"));
00168 
00169     QString richText( text );
00170     int urlPos = 0, urlLen;
00171     while ((urlPos = urlEx.indexIn(richText, urlPos)) >= 0)
00172     {
00173         urlLen = urlEx.matchedLength();
00174         QString href = richText.mid( urlPos, urlLen );
00175         // Qt doesn't support (?<=pattern) so we do it here
00176         if((urlPos > 0) && richText[urlPos-1].isLetterOrNumber()){
00177             urlPos++;
00178             continue;
00179         }
00180         // Don't use QString::arg since %01, %20, etc could be in the string
00181         QString anchor = QString::fromLatin1("<a href=\"") + href + QLatin1String("\">") + href + QLatin1String("</a>");
00182         richText.replace( urlPos, urlLen, anchor );
00183 
00184 
00185         urlPos += anchor.length();
00186     }
00187     return richText;
00188 }
00189 
00190 QString KStringHandler::obscure( const QString &str )
00191 {
00192   QString result;
00193   const QChar *unicode = str.unicode();
00194   for ( int i = 0; i < str.length(); ++i )
00195     // yes, no typo. can't encode ' ' or '!' because
00196     // they're the unicode BOM. stupid scrambling. stupid.
00197     result += ( unicode[ i ].unicode() <= 0x21 ) ? unicode[ i ] :
00198         QChar( 0x1001F - unicode[ i ].unicode() );
00199 
00200   return result;
00201 }
00202 
00203 
00204 bool KStringHandler::isUtf8( const char *buf )
00205 {
00206   int i, n;
00207   register unsigned char c;
00208   bool gotone = false;
00209 
00210   if (!buf)
00211     return true; // whatever, just don't crash
00212 
00213 #define F 0   /* character never appears in text */
00214 #define T 1   /* character appears in plain ASCII text */
00215 #define I 2   /* character appears in ISO-8859 text */
00216 #define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
00217 
00218   static const unsigned char text_chars[256] = {
00219         /*                  BEL BS HT LF    FF CR    */
00220         F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
00221         /*                              ESC          */
00222         F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
00223         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
00224         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
00225         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
00226         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
00227         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
00228         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
00229         /*            NEL                            */
00230         X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
00231         X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
00232         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
00233         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
00234         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
00235         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
00236         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
00237         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
00238   };
00239 
00240   /* *ulen = 0; */
00241   for (i = 0; (c = buf[i]); ++i) {
00242     if ((c & 0x80) == 0) {        /* 0xxxxxxx is plain ASCII */
00243       /*
00244        * Even if the whole file is valid UTF-8 sequences,
00245        * still reject it if it uses weird control characters.
00246        */
00247 
00248       if (text_chars[c] != T)
00249         return false;
00250 
00251     } else if ((c & 0x40) == 0) { /* 10xxxxxx never 1st byte */
00252       return false;
00253     } else {                           /* 11xxxxxx begins UTF-8 */
00254       int following;
00255 
00256     if ((c & 0x20) == 0) {             /* 110xxxxx */
00257       following = 1;
00258     } else if ((c & 0x10) == 0) {      /* 1110xxxx */
00259       following = 2;
00260     } else if ((c & 0x08) == 0) {      /* 11110xxx */
00261       following = 3;
00262     } else if ((c & 0x04) == 0) {      /* 111110xx */
00263       following = 4;
00264     } else if ((c & 0x02) == 0) {      /* 1111110x */
00265       following = 5;
00266     } else
00267       return false;
00268 
00269       for (n = 0; n < following; ++n) {
00270         i++;
00271         if (!(c = buf[i]))
00272           goto done;
00273 
00274         if ((c & 0x80) == 0 || (c & 0x40))
00275           return false;
00276       }
00277       gotone = true;
00278     }
00279   }
00280 done:
00281   return gotone;   /* don't claim it's UTF-8 if it's all 7-bit */
00282 }
00283 
00284 #undef F
00285 #undef T
00286 #undef I
00287 #undef X
00288 
00289 QString KStringHandler::from8Bit( const char *str )
00290 {
00291   if (!str)
00292     return QString();
00293   if (!*str) {
00294     static const QString &emptyString = KGlobal::staticQString("");
00295     return emptyString;
00296   }
00297   return KStringHandler::isUtf8( str ) ?
00298              QString::fromUtf8( str ) :
00299              QString::fromLocal8Bit( str );
00300 }
00301 
00302 int KStringHandler::naturalCompare(const QString &_a, const QString &_b, Qt::CaseSensitivity caseSensitivity)
00303 {
00304     // This method chops the input a and b into pieces of
00305     // digits and non-digits (a1.05 becomes a | 1 | . | 05)
00306     // and compares these pieces of a and b to each other
00307     // (first with first, second with second, ...).
00308     //
00309     // This is based on the natural sort order code code by Martin Pool
00310     // http://sourcefrog.net/projects/natsort/
00311     // Martin Pool agreed to license this under LGPL or GPL.
00312 
00313     // FIXME: Using toLower() to implement case insensitive comparison is
00314     // sub-optimal, but is needed because we compare strings with
00315     // localeAwareCompare(), which does not know about case sensitivity.
00316     // A task has been filled for this in Qt Task Tracker with ID 205990.
00317     // http://trolltech.com/developer/task-tracker/index_html?method=entry&id=205990
00318     QString a;
00319     QString b;
00320     if (caseSensitivity == Qt::CaseSensitive) {
00321         a = _a;
00322         b = _b;
00323     } else {
00324         a = _a.toLower();
00325         b = _b.toLower();
00326     }
00327 
00328     const QChar* currA = a.unicode(); // iterator over a
00329     const QChar* currB = b.unicode(); // iterator over b
00330 
00331     if (currA == currB) {
00332         return 0;
00333     }
00334 
00335     while (!currA->isNull() && !currB->isNull()) {
00336         const QChar* begSeqA = currA; // beginning of a new character sequence of a
00337         const QChar* begSeqB = currB;
00338         if (currA->unicode() == QChar::ObjectReplacementCharacter) {
00339             return 1;
00340         }
00341 
00342         if (currB->unicode() == QChar::ObjectReplacementCharacter) {
00343             return -1;
00344         }
00345 
00346         if (currA->unicode() == QChar::ReplacementCharacter) {
00347             return 1;
00348         }
00349 
00350         if (currB->unicode() == QChar::ReplacementCharacter) {
00351             return -1;
00352         }
00353 
00354         // find sequence of characters ending at the first non-character
00355         while (!currA->isNull() && !currA->isDigit() && !currA->isPunct() && !currA->isSpace()) {
00356             ++currA;
00357         }
00358 
00359         while (!currB->isNull() && !currB->isDigit() && !currB->isPunct() && !currB->isSpace()) {
00360             ++currB;
00361         }
00362 
00363         // compare these sequences
00364         const QStringRef& subA(a.midRef(begSeqA - a.unicode(), currA - begSeqA));
00365         const QStringRef& subB(b.midRef(begSeqB - b.unicode(), currB - begSeqB));
00366         const int cmp = QStringRef::localeAwareCompare(subA, subB);
00367         if (cmp != 0) {
00368             return cmp < 0 ? -1 : +1;
00369         }
00370 
00371         if (currA->isNull() || currB->isNull()) {
00372             break;
00373         }
00374 
00375         // find sequence of characters ending at the first non-character
00376         while ((currA->isPunct() || currA->isSpace()) && (currB->isPunct() || currB->isSpace())) {
00377             if (*currA != *currB) {
00378                 return (*currA < *currB) ? -1 : +1;
00379             }
00380             ++currA;
00381             ++currB;
00382             if (currA->isNull() || currB->isNull()) {
00383                 break;
00384             }
00385         }
00386 
00387         // now some digits follow...
00388         if ((*currA == QLatin1Char('0')) || (*currB == QLatin1Char('0'))) {
00389             // one digit-sequence starts with 0 -> assume we are in a fraction part
00390             // do left aligned comparison (numbers are considered left aligned)
00391             while (1) {
00392                 if (!currA->isDigit() && !currB->isDigit()) {
00393                     break;
00394                 } else if (!currA->isDigit()) {
00395                     return +1;
00396                 } else if (!currB->isDigit()) {
00397                     return -1;
00398                 } else if (*currA < *currB) {
00399                     return -1;
00400                 } else if (*currA > *currB) {
00401                     return + 1;
00402                 }
00403                 ++currA;
00404                 ++currB;
00405             }
00406         } else {
00407             // No digit-sequence starts with 0 -> assume we are looking at some integer
00408             // do right aligned comparison.
00409             //
00410             // The longest run of digits wins. That aside, the greatest
00411             // value wins, but we can't know that it will until we've scanned
00412             // both numbers to know that they have the same magnitude.
00413 
00414             bool isFirstRun = true;
00415             int weight = 0;
00416             while (1) {
00417                 if (!currA->isDigit() && !currB->isDigit()) {
00418                     if (weight != 0) {
00419                         return weight;
00420                     }
00421                     break;
00422                 } else if (!currA->isDigit()) {
00423                     if (isFirstRun) {
00424                         return *currA < *currB ? -1 : +1;
00425                     } else {
00426                         return -1;
00427                     }
00428                 } else if (!currB->isDigit()) {
00429                     if (isFirstRun) {
00430                         return *currA < *currB ? -1 : +1;
00431                     } else {
00432                         return +1;
00433                     }
00434                 } else if ((*currA < *currB) && (weight == 0)) {
00435                     weight = -1;
00436                 } else if ((*currA > *currB) && (weight == 0)) {
00437                     weight = + 1;
00438                 }
00439                 ++currA;
00440                 ++currB;
00441                 isFirstRun = false;
00442             }
00443         }
00444     }
00445 
00446     if (currA->isNull() && currB->isNull()) {
00447         return 0;
00448     }
00449 
00450     return currA->isNull() ? -1 : + 1;
00451 }
00452 
00453 QString KStringHandler::preProcessWrap(const QString &text)
00454 {
00455     const QChar zwsp(0x200b);
00456 
00457     QString result;
00458     result.reserve(text.length());
00459 
00460     for (int i = 0; i < text.length(); i++) {
00461         const QChar c = text[i];
00462         bool openingParens = (c == QLatin1Char('(') || c == QLatin1Char('{') || c == QLatin1Char('['));
00463         bool singleQuote = (c == QLatin1Char('\'') );
00464         bool closingParens = (c == QLatin1Char(')') || c == QLatin1Char('}') || c == QLatin1Char(']'));
00465         bool breakAfter   = (closingParens || c.isPunct() || c.isSymbol());
00466         bool nextIsSpace  = (i == (text.length() - 1) || text[i + 1].isSpace());
00467         bool prevIsSpace  = (i == 0 || text[i - 1].isSpace() || result[result.length() - 1] == zwsp);
00468 
00469         // Provide a breaking opportunity before opening parenthesis
00470         if (openingParens && !prevIsSpace)
00471             result += zwsp;
00472         
00473         // Provide a word joiner before the single quote
00474         if (singleQuote && !prevIsSpace)
00475             result += QChar(0x2060);
00476 
00477         result += c;
00478 
00479         if (breakAfter && !openingParens && !nextIsSpace && !singleQuote) 
00480             result += zwsp;
00481     }
00482 
00483     return result;
00484 }
00485 

KDECore

Skip menu "KDECore"
  • Main Page
  • Modules
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.7.3
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal