KDocTools
xslt.cpp
Go to the documentation of this file.
00001 #include "xslt.h" 00002 00003 #include <libxslt/xsltconfig.h> 00004 #include <libxslt/xsltInternals.h> 00005 #include <libxslt/transform.h> 00006 #include <libxslt/xsltutils.h> 00007 #include <libxml/xmlIO.h> 00008 #include <libxml/parserInternals.h> 00009 #include <libxml/catalog.h> 00010 #include <QtCore/QDate> 00011 #include <QtCore/QDir> 00012 #include <QtCore/QRegExp> 00013 #include <assert.h> 00014 #include <QtCore/QTextCodec> 00015 #include <stdlib.h> 00016 #include <stdarg.h> 00017 00018 #ifdef Q_OS_WIN 00019 #include <config-kdoctools.h> 00020 #include <QtCore/QCoreApplication> 00021 #include <QtCore/QDebug> 00022 #include <QtCore/QHash> 00023 #endif 00024 00025 #if !defined( SIMPLE_XSLT ) 00026 extern HelpProtocol *slave; 00027 #define INFO( x ) if (slave) slave->infoMessage(x); 00028 #else 00029 #define INFO( x ) 00030 #endif 00031 00032 int writeToQString(void * context, const char * buffer, int len) 00033 { 00034 QString *t = (QString*)context; 00035 *t += QString::fromUtf8(buffer, len); 00036 return len; 00037 } 00038 00039 int closeQString(void * context) { 00040 QString *t = (QString*)context; 00041 *t += '\n'; 00042 return 0; 00043 } 00044 00045 #if defined (SIMPLE_XSLT) && defined(Q_WS_WIN) 00046 00047 #define MAX_PATHS 64 00048 xmlExternalEntityLoader defaultEntityLoader = NULL; 00049 static xmlChar *paths[MAX_PATHS + 1]; 00050 static int nbpaths = 0; 00051 static QHash<QString,QString> replaceURLList; 00052 00053 /* 00054 * Entity loading control and customization. 00055 * taken from xsltproc.c 00056 */ 00057 static xmlParserInputPtr xsltprocExternalEntityLoader(const char *_URL, const char *ID,xmlParserCtxtPtr ctxt) 00058 { 00059 xmlParserInputPtr ret; 00060 warningSAXFunc warning = NULL; 00061 00062 // use local available dtd versions instead of fetching it everytime from the internet 00063 QString url = QLatin1String(_URL); 00064 QHash<QString, QString>::const_iterator i; 00065 for(i = replaceURLList.constBegin(); i != replaceURLList.constEnd(); i++) 00066 { 00067 if (url.startsWith(i.key())) 00068 { 00069 url.replace(i.key(),i.value()); 00070 qDebug() << "converted" << _URL << "to" << url; 00071 } 00072 } 00073 char URL[1024]; 00074 strcpy(URL,url.toLatin1().constData()); 00075 00076 const char *lastsegment = URL; 00077 const char *iter = URL; 00078 00079 if (nbpaths > 0) { 00080 while (*iter != 0) { 00081 if (*iter == '/') 00082 lastsegment = iter + 1; 00083 iter++; 00084 } 00085 } 00086 00087 if ((ctxt != NULL) && (ctxt->sax != NULL)) { 00088 warning = ctxt->sax->warning; 00089 ctxt->sax->warning = NULL; 00090 } 00091 00092 if (defaultEntityLoader != NULL) { 00093 ret = defaultEntityLoader(URL, ID, ctxt); 00094 if (ret != NULL) { 00095 if (warning != NULL) 00096 ctxt->sax->warning = warning; 00097 qDebug() << "Loaded URL=\"" << URL << "\" ID=\"" << ID << "\""; 00098 return(ret); 00099 } 00100 } 00101 for (int i = 0;i < nbpaths;i++) { 00102 xmlChar *newURL; 00103 00104 newURL = xmlStrdup((const xmlChar *) paths[i]); 00105 newURL = xmlStrcat(newURL, (const xmlChar *) "/"); 00106 newURL = xmlStrcat(newURL, (const xmlChar *) lastsegment); 00107 if (newURL != NULL) { 00108 ret = defaultEntityLoader((const char *)newURL, ID, ctxt); 00109 if (ret != NULL) { 00110 if (warning != NULL) 00111 ctxt->sax->warning = warning; 00112 qDebug() << "Loaded URL=\"" << newURL << "\" ID=\"" << ID << "\""; 00113 xmlFree(newURL); 00114 return(ret); 00115 } 00116 xmlFree(newURL); 00117 } 00118 } 00119 if (warning != NULL) { 00120 ctxt->sax->warning = warning; 00121 if (URL != NULL) 00122 warning(ctxt, "failed to load external entity \"%s\"\n", URL); 00123 else if (ID != NULL) 00124 warning(ctxt, "failed to load external entity \"%s\"\n", ID); 00125 } 00126 return(NULL); 00127 } 00128 #endif 00129 00130 QString transform( const QString &pat, const QString& tss, 00131 const QVector<const char *> ¶ms ) 00132 { 00133 QString parsed; 00134 00135 INFO(i18n("Parsing stylesheet")); 00136 #if defined (SIMPLE_XSLT) && defined(Q_WS_WIN) 00137 // prepare use of local available dtd versions instead of fetching everytime from the internet 00138 // this approach is url based 00139 defaultEntityLoader = xmlGetExternalEntityLoader(); 00140 xmlSetExternalEntityLoader(xsltprocExternalEntityLoader); 00141 00142 replaceURLList[QLatin1String("http://www.oasis-open.org/docbook/xml/4.2")] = QString("file:///%1").arg(DOCBOOK_XML_CURRDTD); 00143 #endif 00144 00145 xsltStylesheetPtr style_sheet = 00146 xsltParseStylesheetFile((const xmlChar *)tss.toLatin1().data()); 00147 00148 if ( !style_sheet ) { 00149 return parsed; 00150 } 00151 if (style_sheet->indent == 1) 00152 xmlIndentTreeOutput = 1; 00153 else 00154 xmlIndentTreeOutput = 0; 00155 00156 INFO(i18n("Parsing document")); 00157 00158 xmlDocPtr doc = xmlParseFile( pat.toLatin1() ); 00159 xsltTransformContextPtr ctxt; 00160 00161 ctxt = xsltNewTransformContext(style_sheet, doc); 00162 if (ctxt == NULL) 00163 return parsed; 00164 00165 INFO(i18n("Applying stylesheet")); 00166 QVector<const char *> p = params; 00167 p.append( NULL ); 00168 xmlDocPtr res = xsltApplyStylesheet(style_sheet, doc, const_cast<const char **>(&p[0])); 00169 xmlFreeDoc(doc); 00170 if (res != NULL) { 00171 xmlOutputBufferPtr outp = xmlOutputBufferCreateIO(writeToQString, (xmlOutputCloseCallback)closeQString, &parsed, 0); 00172 outp->written = 0; 00173 INFO(i18n("Writing document")); 00174 xsltSaveResultTo ( outp, res, style_sheet ); 00175 xmlOutputBufferFlush(outp); 00176 xmlFreeDoc(res); 00177 } 00178 xsltFreeStylesheet(style_sheet); 00179 00180 if (parsed.isEmpty()) 00181 parsed = ' '; // avoid error message 00182 return parsed; 00183 } 00184 00185 /* 00186 xmlParserInputPtr meinExternalEntityLoader(const char *URL, const char *ID, 00187 xmlParserCtxtPtr ctxt) { 00188 xmlParserInputPtr ret = NULL; 00189 00190 // fprintf(stderr, "loading %s %s %s\n", URL, ID, ctxt->directory); 00191 00192 if (URL == NULL) { 00193 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 00194 ctxt->sax->warning(ctxt, 00195 "failed to load external entity \"%s\"\n", ID); 00196 return(NULL); 00197 } 00198 if (!qstrcmp(ID, "-//OASIS//DTD DocBook XML V4.1.2//EN")) 00199 URL = "docbook/xml-dtd-4.1.2/docbookx.dtd"; 00200 if (!qstrcmp(ID, "-//OASIS//DTD XML DocBook V4.1.2//EN")) 00201 URL = "docbook/xml-dtd-4.1.2/docbookx.dtd"; 00202 00203 QString file; 00204 if (KStandardDirs::exists( QDir::currentPath() + "/" + URL ) ) 00205 file = QDir::currentPath() + "/" + URL; 00206 else 00207 file = locate("dtd", URL); 00208 00209 ret = xmlNewInputFromFile(ctxt, file.toLatin1().constData()); 00210 if (ret == NULL) { 00211 if ((ctxt->sax != NULL) && (ctxt->sax->warning != NULL)) 00212 ctxt->sax->warning(ctxt, 00213 00214 "failed to load external entity \"%s\"\n", URL); 00215 } 00216 return(ret); 00217 } 00218 */ 00219 00220 QString splitOut(const QString &parsed, int index) 00221 { 00222 int start_index = index + 1; 00223 while (parsed.at(start_index - 1) != '>') start_index++; 00224 00225 int inside = 0; 00226 00227 QString filedata; 00228 00229 while (true) { 00230 int endindex = parsed.indexOf("</FILENAME>", index); 00231 int startindex = parsed.indexOf("<FILENAME ", index) + 1; 00232 00233 // kDebug() << "FILENAME " << startindex << " " << endindex << " " << inside << " " << parsed.mid(startindex + 18, 15)<< " " << parsed.length(); 00234 00235 if (startindex > 0) { 00236 if (startindex < endindex) { 00237 // kDebug() << "finding another"; 00238 index = startindex + 8; 00239 inside++; 00240 } else { 00241 index = endindex + 8; 00242 inside--; 00243 } 00244 } else { 00245 inside--; 00246 index = endindex + 1; 00247 } 00248 00249 if (inside == 0) { 00250 filedata = parsed.mid(start_index, endindex - start_index); 00251 break; 00252 } 00253 00254 } 00255 00256 index = filedata.indexOf("<FILENAME "); 00257 00258 if (index > 0) { 00259 int endindex = filedata.lastIndexOf("</FILENAME>"); 00260 while (filedata.at(endindex) != '>') endindex++; 00261 endindex++; 00262 filedata = filedata.left(index) + filedata.mid(endindex); 00263 } 00264 00265 // filedata.replace(QRegExp(">"), "\n>"); 00266 return filedata; 00267 } 00268 00269 QByteArray fromUnicode( const QString &data ) 00270 { 00271 #ifdef Q_WS_WIN 00272 return data.toUtf8(); 00273 #else 00274 QTextCodec *locale = QTextCodec::codecForLocale(); 00275 QByteArray result; 00276 char buffer[30000]; 00277 uint buffer_len = 0; 00278 uint len = 0; 00279 int offset = 0; 00280 const int part_len = 5000; 00281 00282 QString part; 00283 00284 while ( offset < data.length() ) 00285 { 00286 part = data.mid( offset, part_len ); 00287 QByteArray test = locale->fromUnicode( part ); 00288 if ( locale->toUnicode( test ) == part ) { 00289 result += test; 00290 offset += part_len; 00291 continue; 00292 } 00293 len = part.length(); 00294 buffer_len = 0; 00295 for ( uint i = 0; i < len; i++ ) { 00296 QByteArray test = locale->fromUnicode( part.mid( i, 1 ) ); 00297 if ( locale->toUnicode( test ) == part.mid( i, 1 ) ) { 00298 if (buffer_len + test.length() + 1 > sizeof(buffer)) 00299 break; 00300 strcpy( buffer + buffer_len, test.data() ); 00301 buffer_len += test.length(); 00302 } else { 00303 QString res; 00304 res.sprintf( "&#%d;", part.at( i ).unicode() ); 00305 test = locale->fromUnicode( res ); 00306 if (buffer_len + test.length() + 1 > sizeof(buffer)) 00307 break; 00308 strcpy( buffer + buffer_len, test.data() ); 00309 buffer_len += test.length(); 00310 } 00311 } 00312 result += QByteArray( buffer, buffer_len + 1); 00313 offset += part_len; 00314 } 00315 return result; 00316 #endif 00317 } 00318 00319 void replaceCharsetHeader( QString &output ) 00320 { 00321 QString name; 00322 #ifdef Q_WS_WIN 00323 name = "utf-8"; 00324 // may be required for all xml output 00325 if (output.contains("<table-of-contents>")) 00326 output.replace( QString( "<?xml version=\"1.0\"?>" ), 00327 QString( "<?xml version=\"1.0\" encoding=\"%1\"?>").arg( name ) ); 00328 #else 00329 name = QTextCodec::codecForLocale()->name(); 00330 name.replace( QString( "ISO " ), "iso-" ); 00331 output.replace( QString( "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=utf-8\">" ), 00332 QString( "<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%1\">" ).arg( name ) ); 00333 #endif 00334 }
KDE 4.6 API Reference