• Skip to content
  • Skip to link menu
KDE 4.6 API Reference
  • KDE API Reference
  • kdelibs
  • KDE Home
  • Contact Us
 

Kate

katetextloader.h

Go to the documentation of this file.
00001 /*  This file is part of the Kate project.
00002  *
00003  *  Copyright (C) 2010 Christoph Cullmann <cullmann@kde.org>
00004  *
00005  *  This library is free software; you can redistribute it and/or
00006  *  modify it under the terms of the GNU Library General Public
00007  *  License as published by the Free Software Foundation; either
00008  *  version 2 of the License, or (at your option) any later version.
00009  *
00010  *  This library is distributed in the hope that it will be useful,
00011  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
00012  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00013  *  Library General Public License for more details.
00014  *
00015  *  You should have received a copy of the GNU Library General Public License
00016  *  along with this library; see the file COPYING.LIB.  If not, write to
00017  *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
00018  *  Boston, MA 02110-1301, USA.
00019  */
00020 
00021 #ifndef KATE_TEXTLOADER_H
00022 #define KATE_TEXTLOADER_H
00023 
00024 #include <QtCore/QString>
00025 #include <QtCore/QFile>
00026 
00027 // on the fly compression
00028 #include <kfilterdev.h>
00029 #include <kmimetype.h>
00030 
00031 namespace Kate {
00032 
00038 static const qint64 KATE_FILE_LOADER_BS  = 256 * 1024;
00039 
00043 class TextLoader
00044 {
00045   public:
00051     TextLoader (const QString &filename, KEncodingProber::ProberType proberType)
00052       : m_codec (0)
00053       , m_eof (false) // default to not eof
00054       , m_lastWasEndOfLine (true) // at start of file, we had a virtual newline
00055       , m_lastWasR (false) // we have not found a \r as last char
00056       , m_position (0)
00057       , m_lastLineStart (0)
00058       , m_eol (TextBuffer::eolUnknown) // no eol type detected atm
00059       , m_buffer (KATE_FILE_LOADER_BS, 0)
00060       , m_converterState (0)
00061       , m_bomFound (false)
00062       , m_firstRead (true)
00063       , m_proberType (proberType)
00064     {
00065       // try to get mimetype for on the fly decompression, don't rely on filename!
00066       QFile testMime (filename);
00067       if (testMime.open (QIODevice::ReadOnly))
00068         m_mimeType = KMimeType::findByContent (&testMime)->name ();
00069       else
00070         m_mimeType = KMimeType::findByPath (filename, 0, false)->name ();
00071 
00072       // construct filter device
00073       m_file = KFilterDev::deviceForFile (filename, m_mimeType, false);
00074     }
00075 
00079     ~TextLoader ()
00080     {
00081       delete m_file;
00082       delete m_converterState;
00083     }
00084 
00090     bool open (QTextCodec *codec)
00091     {
00092       m_codec = codec;
00093       m_eof = false;
00094       m_lastWasEndOfLine = true;
00095       m_lastWasR = false;
00096       m_position = 0;
00097       m_lastLineStart = 0;
00098       m_eol = TextBuffer::eolUnknown;
00099       m_text.clear ();
00100       delete m_converterState;
00101       m_converterState = new QTextCodec::ConverterState (QTextCodec::ConvertInvalidToNull);
00102       m_bomFound = false;
00103       m_firstRead = true;
00104 
00105       // if already opened, close the file...
00106       if (m_file->isOpen())
00107         m_file->close ();
00108 
00109       return m_file->open (QIODevice::ReadOnly);
00110     }
00111 
00116     bool eof () const { return m_eof && !m_lastWasEndOfLine && (m_lastLineStart == m_text.length()); }
00117 
00123     TextBuffer::EndOfLineMode eol () const { return m_eol; }
00124 
00129     bool byteOrderMarkFound () const { return m_bomFound; }
00130 
00135     const QString &mimeTypeForFilterDev () const { return m_mimeType; }
00136 
00141     const QChar *unicode () const { return m_text.unicode(); }
00142 
00147     QTextCodec *textCodec () const { return m_codec; }
00148 
00155     bool readLine (int &offset, int &length)
00156     {
00157       length = 0;
00158       offset = 0;
00159       bool encodingError = false;
00160 
00161       static const QLatin1Char cr(QLatin1Char('\r'));
00162       static const QLatin1Char lf(QLatin1Char('\n'));
00163 
00164       while (m_position <= m_text.length())
00165       {
00166         if (m_position == m_text.length())
00167         {
00168           // try to load more text if something is around
00169           if (!m_eof)
00170           {
00171             int c = m_file->read (m_buffer.data(), m_buffer.size());
00172 
00173             // kill the old lines...
00174             m_text.remove (0, m_lastLineStart);
00175 
00176             // if any text is there, append it....
00177             if (c > 0)
00178             {
00179               // detect byte order marks & codec for byte order markers on first read
00180               int bomBytes = 0;
00181               if (m_firstRead) {
00182                 // use first 16 bytes max to allow BOM detection of codec
00183                 QByteArray bom (m_buffer.data(), qMin (16, c));
00184                 QTextCodec *codecForByteOrderMark = QTextCodec::codecForUtfText (bom, 0);
00185 
00186                 // if codec != null, we found a BOM!
00187                 if (codecForByteOrderMark) {
00188                   m_bomFound = true;
00189 
00190                   // eat away the different boms!
00191                   int mib = codecForByteOrderMark->mibEnum ();
00192                   if (mib == 106) // utf8
00193                     bomBytes = 3;
00194                   if (mib == 1013 || mib == 1014) // utf16
00195                     bomBytes = 2;
00196                   if (mib == 1018 || mib == 1019) // utf32
00197                     bomBytes = 4;
00198                 }
00199 
00203                 if (!m_codec) {
00207                   if (codecForByteOrderMark)
00208                     m_codec = codecForByteOrderMark;
00209                   else {
00213                     KEncodingProber prober (m_proberType);
00214                     prober.feed (m_buffer.constData(), c);
00215 
00216                     // we found codec with some confidence?
00217                     if (prober.confidence() > 0.5)
00218                       m_codec = QTextCodec::codecForName(prober.encoding());
00219 
00220                     // no codec, no chance, encoding error
00221                     if (!m_codec)
00222                       return false;
00223                   }
00224                 }
00225 
00226                 m_firstRead = false;
00227               }
00228 
00229               Q_ASSERT (m_codec);
00230               QString unicode = m_codec->toUnicode (m_buffer.constData() + bomBytes, c - bomBytes, m_converterState);
00231 
00232               // detect broken encoding
00233               for (int i = 0; i < unicode.size(); ++i) {
00234                   if (unicode[i] == 0) {
00235                     encodingError = true;
00236                     break;
00237                   }
00238               }
00239 
00240               m_text.append (unicode);
00241             }
00242 
00243             // is file completely read ?
00244             m_eof = (c == -1) || (c == 0);
00245 
00246             // recalc current pos and last pos
00247             m_position -= m_lastLineStart;
00248             m_lastLineStart = 0;
00249           }
00250 
00251           // oh oh, end of file, escape !
00252           if (m_eof && (m_position == m_text.length()))
00253           {
00254             m_lastWasEndOfLine = false;
00255 
00256             // line data
00257             offset = m_lastLineStart;
00258             length = m_position-m_lastLineStart;
00259 
00260             m_lastLineStart = m_position;
00261 
00262             return !encodingError;
00263           }
00264         }
00265 
00266         if (m_text.at(m_position) == lf)
00267         {
00268           m_lastWasEndOfLine = true;
00269 
00270           if (m_lastWasR)
00271           {
00272             m_lastLineStart++;
00273             m_lastWasR = false;
00274             m_eol = TextBuffer::eolDos;
00275           }
00276           else
00277           {
00278             // line data
00279             offset = m_lastLineStart;
00280             length = m_position-m_lastLineStart;
00281 
00282             m_lastLineStart = m_position+1;
00283             m_position++;
00284 
00285             // only win, if not dos!
00286             if (m_eol != TextBuffer::eolDos)
00287               m_eol = TextBuffer::eolUnix;
00288 
00289             return !encodingError;
00290           }
00291         }
00292         else if (m_text.at(m_position) == cr)
00293         {
00294           m_lastWasEndOfLine = true;
00295           m_lastWasR = true;
00296 
00297           // line data
00298           offset = m_lastLineStart;
00299           length = m_position-m_lastLineStart;
00300 
00301           m_lastLineStart = m_position+1;
00302           m_position++;
00303 
00304           // should only win of first time!
00305           if (m_eol == TextBuffer::eolUnknown)
00306             m_eol = TextBuffer::eolMac;
00307 
00308           return !encodingError;
00309         }
00310         else if (m_text.at(m_position) == QChar::LineSeparator)
00311         {
00312           m_lastWasEndOfLine = true;
00313 
00314           // line data
00315           offset = m_lastLineStart;
00316           length = m_position-m_lastLineStart;
00317 
00318           m_lastLineStart = m_position+1;
00319           m_position++;
00320 
00321           return !encodingError;
00322         }
00323         else
00324         {
00325           m_lastWasEndOfLine = false;
00326           m_lastWasR = false;
00327         }
00328 
00329         m_position++;
00330       }
00331 
00332       return !encodingError;
00333     }
00334 
00335   private:
00336     QTextCodec *m_codec;
00337     bool m_eof;
00338     bool m_lastWasEndOfLine;
00339     bool m_lastWasR;
00340     int m_position;
00341     int m_lastLineStart;
00342     TextBuffer::EndOfLineMode m_eol;
00343     QString m_mimeType;
00344     QIODevice *m_file;
00345     QByteArray m_buffer;
00346     QString m_text;
00347     QTextCodec::ConverterState *m_converterState;
00348     bool m_bomFound;
00349     bool m_firstRead;
00350     KEncodingProber::ProberType m_proberType;
00351 };
00352 
00353 }
00354 
00355 #endif

Kate

Skip menu "Kate"
  • Main Page
  • Namespace List
  • Class Hierarchy
  • Alphabetical List
  • Class List
  • File List
  • Namespace Members
  • Class Members
  • Related Pages

kdelibs

Skip menu "kdelibs"
  • DNSSD
  • Interfaces
  •   KHexEdit
  •   KMediaPlayer
  •   KSpeech
  •   KTextEditor
  • Kate
  • kconf_update
  • KDE3Support
  •   KUnitTest
  • KDECore
  • KDED
  • KDEsu
  • KDEUI
  • KDEWebKit
  • KDocTools
  • KFile
  • KHTML
  • KImgIO
  • KInit
  • kio
  • KIOSlave
  • KJS
  •   KJS-API
  •   WTF
  • kjsembed
  • KNewStuff
  • KParts
  • KPty
  • Kross
  • KUnitConversion
  • KUtils
  • Nepomuk
  • Plasma
  • Solid
  • Sonnet
  • ThreadWeaver
Generated for kdelibs by doxygen 1.7.3
This website is maintained by Adriaan de Groot and Allen Winter.
KDE® and the K Desktop Environment® logo are registered trademarks of KDE e.V. | Legal