KDECore
kmimemagicrule.cpp
Go to the documentation of this file.
00001 /* This file is part of the KDE libraries 00002 * Copyright 2007 David Faure <faure@kde.org> 00003 * 00004 * This library is free software; you can redistribute it and/or 00005 * modify it under the terms of the GNU Library General Public 00006 * License as published by the Free Software Foundation; either 00007 * version 2 of the License, or (at your option) any later version. 00008 * 00009 * This library is distributed in the hope that it will be useful, 00010 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00011 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00012 * Library General Public License for more details. 00013 * 00014 * You should have received a copy of the GNU Library General Public License 00015 * along with this library; see the file COPYING.LIB. If not, write to 00016 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00017 * Boston, MA 02110-1301, USA. 00018 */ 00019 00020 #include "kmimemagicrule_p.h" 00021 #include <QIODevice> 00022 #include <kdebug.h> 00023 #include <QByteArrayMatcher> 00024 00025 /* 00026 * Historical note: 00027 * The notion of indents is used differently from the old file(1) magic file. 00028 * It is not enough that a top-level rule matches for the search to be over; 00029 * in file(1) subrules were used as refinement (and in KMimeMagic they were 00030 * mandatory if the toplevel rule didn't have a mimetype associated with it). 00031 * Here they are mandatory. 00032 * We need at least one continuation at every level to match, and then the match is valid: 00033 [50:application/x-kformula] 00034 >0=^B^_<8B> 00035 1>10=^GKOffice 00036 2>18=^Xapplication/x-kformula^D^F 00037 >0=^DPK^C^D 00038 1>30=^Hmimetype 00039 2>38=^Vapplication/x-kformula 00040 * Either it's an old (tar) file and the first hierarchy (0,1,2 levels) matches, 00041 * or it's a newer file (zip) file and the second hierarchy (0,1,2 levels) has to match. 00042 * 00043 */ 00044 00045 static bool testMatches(QIODevice* device, qint64 deviceSize, QByteArray& availableData, const QList<KMimeMagicMatch>& matches, const QString& mimeType) 00046 { 00047 for ( QList<KMimeMagicMatch>::const_iterator it = matches.begin(), end = matches.end() ; 00048 it != end ; ++it ) { 00049 const KMimeMagicMatch& match = *it; 00050 if (match.match(device, deviceSize, availableData, mimeType)) { 00051 // One of the hierarchies matched -> mimetype recognized. 00052 return true; 00053 } 00054 } 00055 return false; 00056 } 00057 00058 // Taken from QByteArray::indexOf, but that one uses strncmp so it stops on '\0', 00059 // replaced with memcmp here... 00060 static int indexOf(const QByteArray& that, const QByteArray &ba) 00061 { 00062 const int l = that.size(); 00063 const int ol = ba.size(); 00064 if (ol > l) 00065 return -1; 00066 if (ol == 0) 00067 return 0; 00068 if (ol == 1) 00069 return that.indexOf(*ba.constData()); 00070 00071 if (l > 500 && ol > 5) 00072 return QByteArrayMatcher(ba).indexIn(that); 00073 00074 const char *needle = ba.data(); 00075 const char *haystack = that.data(); 00076 const char *end = that.data() + (l - ol); 00077 const uint ol_minus_1 = ol - 1; 00078 uint hashNeedle = 0, hashHaystack = 0; 00079 int idx; 00080 for (idx = 0; idx < ol; ++idx) { 00081 hashNeedle = ((hashNeedle<<1) + needle[idx]); 00082 hashHaystack = ((hashHaystack<<1) + haystack[idx]); 00083 } 00084 hashHaystack -= *(haystack + ol_minus_1); 00085 00086 while (haystack <= end) { 00087 hashHaystack += *(haystack + ol_minus_1); 00088 if (hashHaystack == hashNeedle && *needle == *haystack 00089 && memcmp(needle, haystack, ol) == 0) 00090 return haystack - that.data(); 00091 00092 if (ol_minus_1 < sizeof(uint) * 8 /*CHAR_BIT*/) 00093 hashHaystack -= (*haystack) << ol_minus_1; 00094 hashHaystack <<= 1; 00095 00096 ++haystack; 00097 } 00098 return -1; 00099 } 00100 00101 00102 bool KMimeMagicRule::match(QIODevice* device, qint64 deviceSize, QByteArray& availableData) const 00103 { 00104 return testMatches(device, deviceSize, availableData, m_matches, m_mimetype); 00105 } 00106 00107 bool KMimeMagicMatch::match(QIODevice* device, qint64 deviceSize, QByteArray& availableData, const QString& mimeType) const 00108 { 00109 // First, check that "this" matches, then we'll dive into subMatches if any. 00110 00111 const qint64 mDataSize = m_data.size(); 00112 if (m_rangeStart + mDataSize > deviceSize) 00113 return false; // file is too small 00114 00115 // Read in one block all the data we'll need 00116 // Example: m_data="ABC", m_rangeLength=3 -> we need 3+3-1=5 bytes (ABCxx,xABCx,xxABC would match) 00117 const int dataNeeded = qMin(mDataSize + m_rangeLength - 1, deviceSize - m_rangeStart); 00118 QByteArray readData; 00119 00120 /*kDebug() << "need " << dataNeeded << " bytes of data starting at " << m_rangeStart 00121 << " - availableData has " << availableData.size() << " bytes," 00122 << " device has " << deviceSize << " bytes." << endl;*/ 00123 00124 if (m_rangeStart + dataNeeded > availableData.size() && availableData.size() < deviceSize) { 00125 // Need to read from device 00126 if (!device->seek(m_rangeStart)) 00127 return false; 00128 readData.resize(dataNeeded); 00129 const int nread = device->read(readData.data(), dataNeeded); 00130 //kDebug() << "readData (from device): reading" << dataNeeded << "bytes."; 00131 if (nread < mDataSize) 00132 return false; // error (or not enough data but we checked for that already) 00133 if (m_rangeStart == 0 && readData.size() > availableData.size()) { 00134 availableData = readData; // update cache 00135 } 00136 if (nread < readData.size()) { 00137 // File big enough to contain m_data, but not big enough for the full rangeLength. 00138 // Pad with zeros. 00139 memset(readData.data() + nread, 0, dataNeeded - nread); 00140 } 00141 //kDebug() << "readData (from device) at pos " << m_rangeStart << ":" << readData; 00142 } else { 00143 readData = QByteArray::fromRawData(availableData.constData() + m_rangeStart, 00144 dataNeeded); 00145 // Warning, readData isn't null-terminated so this kDebug 00146 // gives valgrind warnings (when printing as char* data). 00147 //kDebug() << "readData (from availableData) at pos " << m_rangeStart << ":" << readData; 00148 } 00149 00150 // All we need to do now, is to look for m_data in readData (whose size is dataNeeded). 00151 // Either as a simple indexOf search, or applying the mask. 00152 00153 bool found = false; 00154 if (m_mask.isEmpty()) { 00155 //kDebug() << "m_data=" << m_data; 00156 found = ::indexOf(readData, m_data) != -1; 00157 //if (found) 00158 // kDebug() << "Matched readData=" << readData << "with m_data=" << m_data << "so this is" << mimeType; 00159 } else { 00160 const char* mask = m_mask.constData(); 00161 const char* refData = m_data.constData(); 00162 const char* readDataBase = readData.constData(); 00163 // Example (continued from above): 00164 // deviceSize is 4, so dataNeeded was max'ed to 4. 00165 // maxStartPos = 4 - 3 + 1 = 2, and indeed 00166 // we need to check for a match a positions 0 and 1 (ABCx and xABC). 00167 const qint64 maxStartPos = dataNeeded - mDataSize + 1; 00168 for (int i = 0; i < maxStartPos; ++i) { 00169 const char* d = readDataBase + i; 00170 bool valid = true; 00171 for (int off = 0; off < mDataSize; ++off ) { 00172 if ( ((*d++) & mask[off]) != ((refData[off] & mask[off])) ) { 00173 valid = false; 00174 break; 00175 } 00176 } 00177 if (valid) 00178 found = true; 00179 } 00180 } 00181 if (!found) 00182 return false; 00183 00184 // No submatch? Then we are done. 00185 if (m_subMatches.isEmpty()) 00186 return true; 00187 00188 // Check that one of the submatches matches too 00189 return testMatches(device, deviceSize, availableData, m_subMatches, mimeType); 00190 }
KDE 4.6 API Reference