Kate
kateregexp.cpp
Go to the documentation of this file.
00001 /* This file is part of the KDE libraries and the Kate part. 00002 * 00003 * Copyright (C) 2009 Bernhard Beschow <bbeschow@cs.tu-berlin.de> 00004 * Copyright (C) 2007 Sebastian Pipping <webmaster@hartwork.org> 00005 * 00006 * This library is free software; you can redistribute it and/or 00007 * modify it under the terms of the GNU Library General Public 00008 * License as published by the Free Software Foundation; either 00009 * version 2 of the License, or (at your option) any later version. 00010 * 00011 * This library is distributed in the hope that it will be useful, 00012 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 00014 * Library General Public License for more details. 00015 * 00016 * You should have received a copy of the GNU Library General Public License 00017 * along with this library; see the file COPYING.LIB. If not, write to 00018 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 00019 * Boston, MA 02110-1301, USA. 00020 */ 00021 00022 #include "kateregexp.h" 00023 00024 KateRegExp::KateRegExp(const QString &pattern, Qt::CaseSensitivity cs, 00025 QRegExp::PatternSyntax syntax) 00026 : m_regExp(pattern, cs, syntax) 00027 { 00028 } 00029 00030 // these things can besides '.' and '\s' make apptern multi-line: 00031 // \n, \x000A, \x????-\x????, \0012, \0???-\0??? 00032 // a multi-line pattern must not pass as single-line, the other 00033 // way around will just result in slower searches and is therefore 00034 // not as critical 00035 int KateRegExp::repairPattern(bool & stillMultiLine) 00036 { 00037 const QString & text = pattern(); // read-only input for parsing 00038 00039 // get input 00040 const int inputLen = text.length(); 00041 int input = 0; // walker index 00042 00043 // prepare output 00044 QString output; 00045 output.reserve(2 * inputLen + 1); // twice should be enough for the average case 00046 00047 // parser state 00048 stillMultiLine = false; 00049 int replaceCount = 0; 00050 bool insideClass = false; 00051 00052 while (input < inputLen) 00053 { 00054 if (insideClass) 00055 { 00056 // wait for closing, unescaped ']' 00057 switch (text[input].unicode()) 00058 { 00059 case L'\\': 00060 switch (text[input + 1].unicode()) 00061 { 00062 case L'x': 00063 if (input + 5 < inputLen) 00064 { 00065 // copy "\x????" unmodified 00066 output.append(text.mid(input, 6)); 00067 input += 6; 00068 } else { 00069 // copy "\x" unmodified 00070 output.append(text.mid(input, 2)); 00071 input += 2; 00072 } 00073 stillMultiLine = true; 00074 break; 00075 00076 case L'0': 00077 if (input + 4 < inputLen) 00078 { 00079 // copy "\0???" unmodified 00080 output.append(text.mid(input, 5)); 00081 input += 5; 00082 } else { 00083 // copy "\0" unmodified 00084 output.append(text.mid(input, 2)); 00085 input += 2; 00086 } 00087 stillMultiLine = true; 00088 break; 00089 00090 case L's': 00091 // replace "\s" with "[ \t]" 00092 output.append("[ \\t]"); 00093 input += 2; 00094 replaceCount++; 00095 break; 00096 00097 case L'n': 00098 stillMultiLine = true; 00099 // FALLTROUGH 00100 00101 default: 00102 // copy "\?" unmodified 00103 output.append(text.mid(input, 2)); 00104 input += 2; 00105 } 00106 break; 00107 00108 case L']': 00109 // copy "]" unmodified 00110 insideClass = false; 00111 output.append(text[input]); 00112 input++; 00113 break; 00114 00115 default: 00116 // copy "?" unmodified 00117 output.append(text[input]); 00118 input++; 00119 00120 } 00121 } 00122 else 00123 { 00124 // search for real dots and \S 00125 switch (text[input].unicode()) 00126 { 00127 case L'\\': 00128 switch (text[input + 1].unicode()) 00129 { 00130 case L'x': 00131 if (input + 5 < inputLen) 00132 { 00133 // copy "\x????" unmodified 00134 output.append(text.mid(input, 6)); 00135 input += 6; 00136 } else { 00137 // copy "\x" unmodified 00138 output.append(text.mid(input, 2)); 00139 input += 2; 00140 } 00141 stillMultiLine = true; 00142 break; 00143 00144 case L'0': 00145 if (input + 4 < inputLen) 00146 { 00147 // copy "\0???" unmodified 00148 output.append(text.mid(input, 5)); 00149 input += 5; 00150 } else { 00151 // copy "\0" unmodified 00152 output.append(text.mid(input, 2)); 00153 input += 2; 00154 } 00155 stillMultiLine = true; 00156 break; 00157 00158 case L's': 00159 // replace "\s" with "[ \t]" 00160 output.append("[ \\t]"); 00161 input += 2; 00162 replaceCount++; 00163 break; 00164 00165 case L'n': 00166 stillMultiLine = true; 00167 // FALLTROUGH 00168 00169 default: 00170 // copy "\?" unmodified 00171 output.append(text.mid(input, 2)); 00172 input += 2; 00173 } 00174 break; 00175 00176 case L'.': 00177 // replace " with "[^\n]" 00178 output.append("[^\\n]"); 00179 input++; 00180 replaceCount++; 00181 break; 00182 00183 case L'[': 00184 // copy "]" unmodified 00185 insideClass = true; 00186 output.append(text[input]); 00187 input++; 00188 break; 00189 00190 default: 00191 // copy "?" unmodified 00192 output.append(text[input]); 00193 input++; 00194 00195 } 00196 } 00197 } 00198 00199 // Overwrite with repaired pattern 00200 m_regExp.setPattern(output); 00201 return replaceCount; 00202 } 00203 00204 00205 00206 bool KateRegExp::isMultiLine() const 00207 { 00208 const QString &text = pattern(); 00209 00210 // parser state 00211 bool insideClass = false; 00212 00213 for (int input = 0; input < text.length(); /*empty*/ ) 00214 { 00215 if (insideClass) 00216 { 00217 // wait for closing, unescaped ']' 00218 switch (text[input].unicode()) 00219 { 00220 case L'\\': 00221 switch (text[input + 1].unicode()) 00222 { 00223 case L'x': 00224 return true; 00225 00226 case L'0': 00227 return true; 00228 00229 case L's': 00230 // replace "\s" with "[ \t]" 00231 input += 2; 00232 break; 00233 00234 case L'n': 00235 return true; 00236 // FALLTROUGH 00237 00238 default: 00239 // copy "\?" unmodified 00240 input += 2; 00241 } 00242 break; 00243 00244 case L']': 00245 // copy "]" unmodified 00246 insideClass = false; 00247 input++; 00248 break; 00249 00250 default: 00251 // copy "?" unmodified 00252 input++; 00253 00254 } 00255 } 00256 else 00257 { 00258 // search for real dots and \S 00259 switch (text[input].unicode()) 00260 { 00261 case L'\\': 00262 switch (text[input + 1].unicode()) 00263 { 00264 case L'x': 00265 return true; 00266 00267 case L'0': 00268 return true; 00269 00270 case L's': 00271 // replace "\s" with "[ \t]" 00272 input += 2; 00273 break; 00274 00275 case L'n': 00276 return true; 00277 00278 default: 00279 // copy "\?" unmodified 00280 input += 2; 00281 } 00282 break; 00283 00284 case L'.': 00285 // replace " with "[^\n]" 00286 input++; 00287 break; 00288 00289 case L'[': 00290 // copy "]" unmodified 00291 insideClass = true; 00292 input++; 00293 break; 00294 00295 default: 00296 // copy "?" unmodified 00297 input++; 00298 00299 } 00300 } 00301 } 00302 00303 return false; 00304 } 00305 00306 00307 00308 int KateRegExp::indexIn(const QString &str, int start, int end) const 00309 { 00310 return m_regExp.indexIn(str.left(end), start, QRegExp::CaretAtZero); 00311 } 00312 00313 00314 00315 int KateRegExp::lastIndexIn(const QString &str, int start, int end) const 00316 { 00317 const int index = m_regExp.lastIndexIn(str.mid(start, end-start), -1, QRegExp::CaretAtZero); 00318 00319 if (index == -1) 00320 return -1; 00321 00322 const int index2 = m_regExp.indexIn(str.left(end), start+index, QRegExp::CaretAtZero); 00323 00324 return index2; 00325 } 00326 00327 // kate: space-indent on; indent-width 2; replace-tabs on;
KDE 4.6 API Reference