• Skip to content
  • Skip to link menu
KDE 4.7 API Reference
  • KDE API Reference
  • kdelibs
  • KDE Home
  • Contact Us
 

KIOSlave

parsinghelpers.cpp
Go to the documentation of this file.
00001 /* This file is part of the KDE libraries
00002     Copyright (C) 2008 Andreas Hartmetz <ahartmetz@gmail.com>
00003     Copyright (C) 2010,2011 Rolf Eike Beer <kde@opensource.sf-tec.de>
00004 
00005     This library is free software; you can redistribute it and/or
00006     modify it under the terms of the GNU Library General Public
00007     License as published by the Free Software Foundation; either
00008     version 2 of the License, or (at your option) any later version.
00009 
00010     This library is distributed in the hope that it will be useful,
00011     but WITHOUT ANY WARRANTY; without even the implied warranty of
00012     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013     Library General Public License for more details.
00014 
00015     You should have received a copy of the GNU Library General Public License
00016     along with this library; see the file COPYING.LIB.  If not, write to
00017     the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018     Boston, MA 02110-1301, USA.
00019 */
00020 
00021 #include <QDir>
00022 #include <QMap>
00023 #include <QTextCodec>
00024 #include <QUrl>
00025 
00026 #include <kcodecs.h>
00027 #include <kdebug.h>
00028 
00029 // Advance *pos beyond spaces / tabs
00030 static void skipSpace(const char input[], int *pos, int end)
00031 {
00032     int idx = *pos;
00033     while (idx < end && (input[idx] == ' ' || input[idx] == '\t')) {
00034         idx++;
00035     }
00036     *pos = idx;
00037     return;
00038 }
00039 
00040 // Advance *pos to start of next line while being forgiving about line endings.
00041 // Return false if the end of the header has been reached, true otherwise.
00042 static bool nextLine(const char input[], int *pos, int end)
00043 {
00044     int idx = *pos;
00045     while (idx < end && input[idx] != '\r' && input[idx] != '\n') {
00046         idx++;
00047     }
00048     int rCount = 0;
00049     int nCount = 0;
00050     while (idx < end && qMax(rCount, nCount) < 2 && (input[idx] == '\r' || input[idx] == '\n')) {
00051         input[idx] == '\r' ? rCount++ : nCount++;
00052         idx++;
00053     }
00054     if (idx < end && qMax(rCount, nCount) == 2 && qMin(rCount, nCount) == 1) {
00055         // if just one of the others is missing eat it too.
00056         // this ensures that conforming headers using the proper
00057         // \r\n sequence (and also \n\r) will be parsed correctly.
00058         if ((rCount == 1 && input[idx] == '\r') || (nCount == 1 && input[idx] == '\n')) {
00059             idx++;
00060         }
00061     }
00062 
00063     *pos = idx;
00064     return idx < end && rCount < 2 && nCount < 2;
00065 }
00066 
00067 QByteArray TokenIterator::next()
00068 {
00069     QPair<int, int> token = m_tokens[m_currentToken++];
00070     //fromRawData brings some speed advantage but also the requirement to keep the text buffer
00071     //around. this together with implicit sharing (you don't know where copies end up)
00072     //is dangerous!
00073     //return QByteArray::fromRawData(&m_buffer[token.first], token.second - token.first);
00074     return QByteArray(&m_buffer[token.first], token.second - token.first);
00075 }
00076 
00077 QByteArray TokenIterator::current() const
00078 {
00079     QPair<int, int> token = m_tokens[m_currentToken - 1];
00080     //return QByteArray::fromRawData(&m_buffer[token.first], token.second - token.first);
00081     return QByteArray(&m_buffer[token.first], token.second - token.first);
00082 }
00083 
00084 QList<QByteArray> TokenIterator::all() const
00085 {
00086     QList<QByteArray> ret;
00087     for (int i = 0; i < m_tokens.count(); i++) {
00088         QPair<int, int> token = m_tokens[i];
00089         ret.append(QByteArray(&m_buffer[token.first], token.second - token.first));
00090     }
00091     return ret;
00092 }
00093 
00094 
00095 HeaderTokenizer::HeaderTokenizer(char *buffer)
00096     : m_buffer(buffer)
00097 {
00098     // add information about available headers and whether they have one or multiple,
00099     // comma-separated values.
00100 
00101     //The following response header fields are from RFC 2616 unless otherwise specified.
00102     //Hint: search the web for e.g. 'http "accept-ranges header"' to find information about
00103     //a header field.
00104     static const HeaderFieldTemplate headerFieldTemplates[] = {
00105         {"accept-ranges", false},
00106         {"age", false},
00107         {"cache-control", true},
00108         {"connection", true},
00109         {"content-disposition", false}, //is multi-valued in a way, but with ";" separator!
00110         {"content-encoding", true},
00111         {"content-language", true},
00112         {"content-length", false},
00113         {"content-location", false},
00114         {"content-md5", false},
00115         {"content-type", false},
00116         {"date", false},
00117         {"dav", true}, //RFC 2518
00118         {"etag", false},
00119         {"expires", false},
00120         {"keep-alive", false}, //RFC 2068
00121         {"last-modified", false},
00122         {"link", false}, //RFC 2068, multi-valued with ";" separator
00123         {"location", false},
00124         {"p3p", true}, // http://www.w3.org/TR/P3P/
00125         {"pragma", true},
00126         {"proxy-authenticate", false}, //complicated multi-valuedness: quoted commas don't separate
00127                                        //multiple values. we handle this at a higher level.
00128         {"proxy-connection", true}, //inofficial but well-known; to avoid misunderstandings
00129                                     //when using "connection" when talking to a proxy.
00130         {"refresh", false}, //not sure, only found some mailing list posts mentioning it
00131         {"set-cookie", false}, //RFC 2109; the multi-valuedness seems to be usually achieved
00132                                //by sending several instances of this field as opposed to
00133                                //usually comma-separated lists with maybe multiple instances.
00134         {"transfer-encoding", true},
00135         {"upgrade", true},
00136         {"warning", true},
00137         {"www-authenticate", false} //see proxy-authenticate
00138     };
00139 
00140     for (uint i = 0; i < sizeof(headerFieldTemplates) / sizeof(HeaderFieldTemplate); i++) {
00141         const HeaderFieldTemplate &ft = headerFieldTemplates[i];
00142         insert(QByteArray(ft.name), HeaderField(ft.isMultiValued));
00143     }
00144 }
00145 
00146 int HeaderTokenizer::tokenize(int begin, int end)
00147 {
00148     char *buf = m_buffer;  //keep line length in check :/
00149     int idx = begin;
00150     int startIdx = begin; //multi-purpose start of current token
00151     bool multiValuedEndedWithComma = false; //did the last multi-valued line end with a comma?
00152     QByteArray headerKey;
00153     do {
00154 
00155         if (buf[idx] == ' ' || buf [idx] == '\t') {
00156             // line continuation; preserve startIdx except (see below)
00157             if (headerKey.isEmpty()) {
00158                 continue;
00159             }
00160             // turn CR/LF into spaces for later parsing convenience
00161             int backIdx = idx - 1;
00162             while (backIdx >= begin && (buf[backIdx] == '\r' || buf[backIdx] == '\n')) {
00163                 buf[backIdx--] = ' ';
00164             }
00165 
00166             // multiple values, comma-separated: add new value or continue previous?
00167             if (operator[](headerKey).isMultiValued) {
00168                 if (multiValuedEndedWithComma) {
00169                     // start new value; this is almost like no line continuation
00170                     skipSpace(buf, &idx, end);
00171                     startIdx = idx;
00172                 } else {
00173                     // continue previous value; this is tricky. unit tests to the rescue!
00174                     if (operator[](headerKey).beginEnd.last().first == startIdx) {
00175                         // remove entry, it will be re-added because already idx != startIdx
00176                         operator[](headerKey).beginEnd.removeLast();
00177                     } else {
00178                         // no comma, no entry: the prev line was whitespace only - start new value
00179                         skipSpace(buf, &idx, end);
00180                         startIdx = idx;
00181                     }
00182                 }
00183             }
00184 
00185         } else {
00186             // new field
00187             startIdx = idx;
00188             // also make sure that there is at least one char after the colon
00189             while (idx < (end - 1) && buf[idx] != ':' && buf[idx] != '\r' && buf[idx] != '\n') {
00190                 buf[idx] = tolower(buf[idx]);
00191                 idx++;
00192             }
00193             if (buf[idx] != ':') {
00194                 //malformed line: no colon
00195                 headerKey.clear();
00196                 continue;
00197             }
00198             headerKey = QByteArray(&buf[startIdx], idx - startIdx);
00199             if (!contains(headerKey)) {
00200                 //we don't recognize this header line
00201                 headerKey.clear();
00202                 continue;
00203             }
00204             // skip colon & leading whitespace
00205             idx++;
00206             skipSpace(buf, &idx, end);
00207             startIdx = idx;
00208         }
00209 
00210         // we have the name/key of the field, now parse the value
00211         if (!operator[](headerKey).isMultiValued) {
00212 
00213             // scan to end of line
00214             while (idx < end && buf[idx] != '\r' && buf[idx] != '\n') {
00215                 idx++;
00216             }
00217             if (!operator[](headerKey).beginEnd.isEmpty()) {
00218                 // there already is an entry; are we just in a line continuation?
00219                 if (operator[](headerKey).beginEnd.last().first == startIdx) {
00220                     // line continuation: delete previous entry and later insert a new, longer one.
00221                     operator[](headerKey).beginEnd.removeLast();
00222                 }
00223             }
00224             operator[](headerKey).beginEnd.append(QPair<int, int>(startIdx, idx));
00225 
00226         } else {
00227 
00228             // comma-separated list
00229             while (true) {
00230                 //skip one value
00231                 while (idx < end && buf[idx] != '\r' && buf[idx] != '\n' && buf[idx] != ',') {
00232                     idx++;
00233                 }
00234                 if (idx != startIdx) {
00235                     operator[](headerKey).beginEnd.append(QPair<int, int>(startIdx, idx));
00236                 }
00237                 multiValuedEndedWithComma = buf[idx] == ',';
00238                 //skip comma(s) and leading whitespace, if any respectively
00239                 while (idx < end && buf[idx] == ',') {
00240                     idx++;
00241                 }
00242                 skipSpace(buf, &idx, end);
00243                 //next value or end-of-line / end of header?
00244                 if (buf[idx] >= end || buf[idx] == '\r' || buf[idx] == '\n') {
00245                     break;
00246                 }
00247                 //next value
00248                 startIdx = idx;
00249             }
00250         }
00251     } while (nextLine(buf, &idx, end));
00252     return idx;
00253 }
00254 
00255 
00256 TokenIterator HeaderTokenizer::iterator(const char *key) const
00257 {
00258     QByteArray keyBa = QByteArray::fromRawData(key, strlen(key));
00259     if (contains(keyBa)) {
00260         return TokenIterator(value(keyBa).beginEnd, m_buffer);
00261     } else {
00262         return TokenIterator(m_nullTokens, m_buffer);
00263     }
00264 }
00265 
00266 static void skipLWS(const QString &str, int &pos)
00267 {
00268     while (pos < str.length() && (str[pos] == QLatin1Char(' ') || str[pos] == QLatin1Char('\t'))) {
00269         ++pos;
00270     }
00271 }
00272 
00273 // keep the common ending, this allows the compiler to join them
00274 static const char typeSpecials[] =  "{}*'%()<>@,;:\\\"/[]?=";
00275 static const char attrSpecials[] =     "'%()<>@,;:\\\"/[]?=";
00276 static const char valueSpecials[] =      "()<>@,;:\\\"/[]?=";
00277 
00278 static bool specialChar(const QChar &ch, const char *specials)
00279 {
00280     // WORKAROUND: According to RFC 2616, any character other than ascii
00281     // characters should NOT be allowed in unquoted content-disposition file
00282     // names. However, since none of the major browsers follow this rule, we do
00283     // the same thing here and allow all printable unicode characters. See
00284     // https://bugs.kde.org/show_bug.cgi?id=261223 for the detials.
00285     if (!ch.isPrint()) {
00286         return true;
00287     }
00288 
00289     for (int i = qstrlen(specials) - 1; i >= 0; i--) {
00290         if (ch == QLatin1Char(specials[i])) {
00291             return true;
00292         }
00293     }
00294 
00295     return false;
00296 }
00297 
00313 static QString extractUntil(const QString &str, QChar term, int &pos, const char *specials)
00314 {
00315     QString out;
00316     skipLWS(str, pos);
00317     bool valid = true;
00318 
00319     while (pos < str.length() && (str[pos] != term)) {
00320         out += str[pos];
00321         valid = (valid && !specialChar(str[pos], specials));
00322         ++pos;
00323     }
00324 
00325     if (pos < str.length()) { // Stopped due to finding term
00326         ++pos;
00327     }
00328 
00329     if (!valid) {
00330         return QString();
00331     }
00332 
00333     // Remove trailing linear whitespace...
00334     while (out.endsWith(QLatin1Char(' ')) || out.endsWith(QLatin1Char('\t'))) {
00335         out.chop(1);
00336     }
00337 
00338     if (out.contains(QLatin1Char(' '))) {
00339         out.clear();
00340     }
00341 
00342     return out;
00343 }
00344 
00345 // As above, but also handles quotes..
00346 // pos is set to -1 on parse error
00347 static QString extractMaybeQuotedUntil(const QString &str, int &pos)
00348 {
00349     const QChar term = QLatin1Char(';');
00350 
00351     skipLWS(str, pos);
00352 
00353     // Are we quoted?
00354     if (pos < str.length() && str[pos] == QLatin1Char('"')) {
00355         QString out;
00356 
00357         // Skip the quote...
00358         ++pos;
00359 
00360         // when quoted we also need an end-quote
00361         bool endquote = false;
00362 
00363         // Parse until trailing quote...
00364         while (pos < str.length()) {
00365             if (str[pos] == QLatin1Char('\\') && pos + 1 < str.length()) {
00366                 // quoted-pair = "\" CHAR
00367                 out += str[pos + 1];
00368                 pos += 2; // Skip both...
00369             } else if (str[pos] == QLatin1Char('"')) {
00370                 ++pos;
00371                 endquote = true;
00372                 break;
00373             } else if (!str[pos].isPrint()) { // Don't allow CTL's RFC 2616 sec 2.2
00374                 break;
00375             } else {
00376                 out += str[pos];
00377                 ++pos;
00378             }
00379         }
00380 
00381         if (!endquote) {
00382             pos = -1;
00383             return QString();
00384         }
00385 
00386         // Skip until term..
00387         while (pos < str.length() && (str[pos] != term)) {
00388             if ((str[pos] != QLatin1Char(' ')) && (str[pos] != QLatin1Char('\t'))) {
00389                 pos = -1;
00390                 return QString();
00391             }
00392             ++pos;
00393         }
00394 
00395         if (pos < str.length()) {  // Stopped due to finding term
00396             ++pos;
00397         }
00398 
00399         return out;
00400     } else {
00401         return extractUntil(str, term, pos, valueSpecials);
00402     }
00403 }
00404 
00405 static QMap<QString, QString> contentDispositionParserInternal(const QString &disposition)
00406 {
00407     kDebug(7113) << "disposition: " << disposition;
00408     int pos = 0;
00409     const QString strDisposition = extractUntil(disposition, QLatin1Char(';'), pos, typeSpecials).toLower();
00410 
00411     QMap<QString, QString> parameters;
00412     QMap<QString, QString> contparams;   // all parameters that contain continuations
00413     QMap<QString, QString> encparams;    // all parameters that have character encoding
00414 
00415     // the type is invalid, the complete header is junk
00416     if (strDisposition.isEmpty()) {
00417         return parameters;
00418     }
00419 
00420     parameters.insert(QLatin1String("type"), strDisposition);
00421 
00422     while (pos < disposition.length()) {
00423         QString key = extractUntil(disposition, QLatin1Char('='), pos, attrSpecials).toLower();
00424 
00425         if (key.isEmpty()) {
00426             // parse error in this key: do not parse more, but add up
00427             // everything we already got
00428             kDebug(7113) << "parse error in key, abort parsing";
00429             break;
00430         }
00431 
00432         QString val;
00433         if (key.endsWith(QLatin1Char('*'))) {
00434             val = extractUntil(disposition, QLatin1Char(';'), pos, valueSpecials);
00435         } else {
00436             val = extractMaybeQuotedUntil(disposition, pos);
00437         }
00438 
00439         if (val.isEmpty()) {
00440             if (pos == -1) {
00441                 kDebug(7113) << "parse error in value, abort parsing";
00442                 break;
00443             }
00444             continue;
00445         }
00446 
00447         const int spos = key.indexOf(QLatin1Char('*'));
00448         if (spos == key.length() - 1) {
00449             key.chop(1);
00450             encparams.insert(key, val);
00451         } else if (spos >= 0) {
00452             contparams.insert(key, val);
00453         } else if (parameters.contains(key)) {
00454             kDebug(7113) << "duplicate key" << key << "found, ignoring everything more";
00455             parameters.remove(key);
00456             return parameters;
00457         } else {
00458             parameters.insert(key, val);
00459         }
00460     }
00461 
00462     QMap<QString, QString>::iterator i = contparams.begin();
00463     while (i != contparams.end()) {
00464         QString key = i.key();
00465         int spos = key.indexOf(QLatin1Char('*'));
00466         bool hasencoding = false;
00467 
00468         if (key.at(spos + 1) != QLatin1Char('0')) {
00469             ++i;
00470             continue;
00471         }
00472 
00473         // no leading zeros allowed, so delete the junk
00474         int klen = key.length();
00475         if (klen > spos + 2) {
00476             // nothing but continuations and encodings may insert * into parameter name
00477             if ((klen > spos + 3) || ((klen == spos + 3) && (key.at(spos + 2) != QLatin1Char('*')))) {
00478                 kDebug(7113) << "removing invalid key " << key << "with val" << i.value() << key.at(spos + 2);
00479                 i = contparams.erase(i);
00480                 continue;
00481             }
00482             hasencoding = true;
00483         }
00484 
00485         int seqnum = 1;
00486         QMap<QString, QString>::iterator partsi;
00487         // we do not need to care about encoding specifications: only the first
00488         // part is allowed to have one
00489         QString val = i.value();
00490 
00491         key.chop(hasencoding ? 2 : 1);
00492 
00493         while ((partsi = contparams.find(key + QString::number(seqnum))) != contparams.end()) {
00494             val += partsi.value();
00495             contparams.erase(partsi);
00496         }
00497 
00498         i = contparams.erase(i);
00499 
00500         key.chop(1);
00501         if (hasencoding) {
00502             encparams.insert(key, val);
00503         } else {
00504             if (parameters.contains(key)) {
00505                 kDebug(7113) << "duplicate key" << key << "found, ignoring everything more";
00506                 parameters.remove(key);
00507                 return parameters;
00508             }
00509 
00510             parameters.insert(key, val);
00511         }
00512     }
00513 
00514     for (QMap<QString, QString>::iterator i = encparams.begin(); i != encparams.end(); ++i) {
00515         QString val = i.value();
00516 
00517         // RfC 2231 encoded character set in filename
00518         int spos = val.indexOf(QLatin1Char('\''));
00519         if (spos == -1) {
00520             continue;
00521         }
00522         int npos = val.indexOf(QLatin1Char('\''), spos + 1);
00523         if (npos == -1) {
00524             continue;
00525         }
00526 
00527         const QString charset = val.left(spos);
00528         const QString lang = val.mid(spos + 1, npos - spos - 1);
00529         const QByteArray rawval = QByteArray::fromPercentEncoding(val.mid(npos + 1).toAscii());
00530 
00531         if (charset.isEmpty() || (charset == QLatin1String("us-ascii"))) {
00532             bool valid = true;
00533             for (int j = rawval.length() - 1; (j >= 0) && valid; j--) {
00534                 valid = (rawval.at(j) >= 32);
00535             }
00536 
00537             if (!valid)
00538                 continue;
00539             val = QString::fromAscii(rawval.constData());
00540         } else {
00541             QTextCodec *codec = QTextCodec::codecForName(charset.toAscii());
00542             if (!codec)
00543                 continue;
00544             val = codec->toUnicode(rawval);
00545         }
00546 
00547         parameters.insert(i.key(), val);
00548     }
00549 
00550     return parameters;
00551 }
00552 
00553 static QMap<QString, QString> contentDispositionParser(const QString &disposition)
00554 {
00555     QMap<QString, QString> parameters = contentDispositionParserInternal(disposition);
00556 
00557     const QLatin1String fn("filename");
00558     if (parameters.contains(fn)) {
00559         // Content-Disposition is not allowed to dictate directory
00560         // path, thus we extract the filename only.
00561         const QString val = QDir::toNativeSeparators(parameters[fn]);
00562         int slpos = val.lastIndexOf(QDir::separator());
00563 
00564         if (slpos > -1) {
00565             parameters.insert(fn, val.mid(slpos + 1));
00566         }
00567     }
00568 
00569     return parameters;
00570 }

KIOSlave

Skip menu "KIOSlave"
  • Main Page
  • Alphabetical List
  • Class List
  • Class Hierarchy
  • Class Members
  • File List
  • File Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.7.5
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal