KDECore
nsMBCSGroupProber.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 00002 /* -*- C++ -*- 00003 * Copyright (C) 1998 <developer@mozilla.org> 00004 * 00005 * 00006 * Permission is hereby granted, free of charge, to any person obtaining 00007 * a copy of this software and associated documentation files (the 00008 * "Software"), to deal in the Software without restriction, including 00009 * without limitation the rights to use, copy, modify, merge, publish, 00010 * distribute, sublicense, and/or sell copies of the Software, and to 00011 * permit persons to whom the Software is furnished to do so, subject to 00012 * the following conditions: 00013 * 00014 * The above copyright notice and this permission notice shall be included 00015 * in all copies or substantial portions of the Software. 00016 * 00017 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00018 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00019 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 00020 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 00021 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 00022 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 00023 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00024 */ 00025 00026 #include "nsMBCSGroupProber.h" 00027 00028 #include <stdio.h> 00029 #include <stdlib.h> 00030 00031 namespace kencodingprober { 00032 #ifdef DEBUG_PROBE 00033 static const char* const ProberName[] = 00034 { 00035 "Unicode", 00036 "SJIS", 00037 "EUCJP", 00038 "GB18030", 00039 "EUCKR", 00040 "Big5", 00041 "EUCTW", 00042 }; 00043 00044 #endif 00045 00046 nsMBCSGroupProber::nsMBCSGroupProber() 00047 { 00048 mProbers[0] = new UnicodeGroupProber(); 00049 mProbers[1] = new nsSJISProber(); 00050 mProbers[2] = new nsEUCJPProber(); 00051 mProbers[3] = new nsGB18030Prober(); 00052 mProbers[4] = new nsEUCKRProber(); 00053 mProbers[5] = new nsBig5Prober(); 00054 mProbers[6] = new nsEUCTWProber(); 00055 Reset(); 00056 } 00057 00058 nsMBCSGroupProber::~nsMBCSGroupProber() 00059 { 00060 for (unsigned int i = 0; i < NUM_OF_PROBERS; i++) 00061 { 00062 delete mProbers[i]; 00063 } 00064 } 00065 00066 const char* nsMBCSGroupProber::GetCharSetName() 00067 { 00068 if (mBestGuess == -1) 00069 { 00070 GetConfidence(); 00071 if (mBestGuess == -1) 00072 mBestGuess = 0; 00073 } 00074 return mProbers[mBestGuess]->GetCharSetName(); 00075 } 00076 00077 void nsMBCSGroupProber::Reset(void) 00078 { 00079 mActiveNum = 0; 00080 for (unsigned int i = 0; i < NUM_OF_PROBERS; i++) 00081 { 00082 if (mProbers[i]) 00083 { 00084 mProbers[i]->Reset(); 00085 mIsActive[i] = true; 00086 ++mActiveNum; 00087 } 00088 else 00089 mIsActive[i] = false; 00090 } 00091 mBestGuess = -1; 00092 mState = eDetecting; 00093 } 00094 00095 nsProbingState nsMBCSGroupProber::HandleData(const char* aBuf, unsigned int aLen) 00096 { 00097 nsProbingState st; 00098 unsigned int i; 00099 00100 //do filtering to reduce load to probers 00101 char *highbyteBuf; 00102 char *hptr; 00103 bool keepNext = true; //assume previous is not ascii, it will do no harm except add some noise 00104 hptr = highbyteBuf = (char*)malloc(aLen); 00105 if (!hptr) 00106 return mState; 00107 for (i = 0; i < aLen; ++i) 00108 { 00109 if (aBuf[i] & 0x80) 00110 { 00111 *hptr++ = aBuf[i]; 00112 keepNext = true; 00113 } 00114 else 00115 { 00116 //if previous is highbyte, keep this even it is a ASCII 00117 if (keepNext) 00118 { 00119 *hptr++ = aBuf[i]; 00120 keepNext = false; 00121 } 00122 } 00123 } 00124 00125 for (i = 0; i < NUM_OF_PROBERS; ++i) 00126 { 00127 if (!mIsActive[i]) 00128 continue; 00129 st = mProbers[i]->HandleData(highbyteBuf, hptr - highbyteBuf); 00130 if (st == eFoundIt) 00131 { 00132 mBestGuess = i; 00133 mState = eFoundIt; 00134 break; 00135 } 00136 else if (st == eNotMe) 00137 { 00138 mIsActive[i] = false; 00139 mActiveNum--; 00140 if (mActiveNum <= 0) 00141 { 00142 mState = eNotMe; 00143 break; 00144 } 00145 } 00146 } 00147 00148 free(highbyteBuf); 00149 00150 return mState; 00151 } 00152 00153 float nsMBCSGroupProber::GetConfidence(void) 00154 { 00155 unsigned int i; 00156 float bestConf = 0.0, cf; 00157 00158 switch (mState) 00159 { 00160 case eFoundIt: 00161 return (float)0.99; 00162 case eNotMe: 00163 return (float)0.01; 00164 default: 00165 for (i = 0; i < NUM_OF_PROBERS; ++i) 00166 { 00167 if (!mIsActive[i]) 00168 continue; 00169 cf = mProbers[i]->GetConfidence(); 00170 if (bestConf < cf) 00171 { 00172 bestConf = cf; 00173 mBestGuess = i; 00174 } 00175 } 00176 } 00177 return bestConf; 00178 } 00179 00180 #ifdef DEBUG_PROBE 00181 void nsMBCSGroupProber::DumpStatus() 00182 { 00183 unsigned int i; 00184 float cf; 00185 00186 GetConfidence(); 00187 for (i = 0; i < NUM_OF_PROBERS; i++) 00188 { 00189 if (!mIsActive[i]) 00190 printf(" MBCS inactive: [%s] (confidence is too low).\r\n", ProberName[i]); 00191 else 00192 { 00193 cf = mProbers[i]->GetConfidence(); 00194 printf(" MBCS %1.3f: [%s]\r\n", cf, ProberName[i]); 00195 } 00196 } 00197 } 00198 #endif 00199 } 00200 00201
KDE 4.6 API Reference