KDECore
nsSBCSGroupProber.cpp
Go to the documentation of this file.
00001 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 00002 /* -*- C++ -*- 00003 * Copyright (C) 1998 <developer@mozilla.org> 00004 * 00005 * 00006 * Permission is hereby granted, free of charge, to any person obtaining 00007 * a copy of this software and associated documentation files (the 00008 * "Software"), to deal in the Software without restriction, including 00009 * without limitation the rights to use, copy, modify, merge, publish, 00010 * distribute, sublicense, and/or sell copies of the Software, and to 00011 * permit persons to whom the Software is furnished to do so, subject to 00012 * the following conditions: 00013 * 00014 * The above copyright notice and this permission notice shall be included 00015 * in all copies or substantial portions of the Software. 00016 * 00017 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00018 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00019 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 00020 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 00021 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 00022 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 00023 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00024 */ 00025 00026 #include "nsSBCSGroupProber.h" 00027 00028 #include "nsSBCharSetProber.h" 00029 #include "nsHebrewProber.h" 00030 #include "UnicodeGroupProber.h" 00031 00032 #include <stdio.h> 00033 #include <stdlib.h> 00034 00035 namespace kencodingprober { 00036 nsSBCSGroupProber::nsSBCSGroupProber() 00037 { 00038 mProbers[0] = new nsSingleByteCharSetProber(&Win1251Model); 00039 mProbers[1] = new nsSingleByteCharSetProber(&Koi8rModel); 00040 mProbers[2] = new nsSingleByteCharSetProber(&Latin5Model); 00041 mProbers[3] = new nsSingleByteCharSetProber(&MacCyrillicModel); 00042 mProbers[4] = new nsSingleByteCharSetProber(&Ibm866Model); 00043 mProbers[5] = new nsSingleByteCharSetProber(&Ibm855Model); 00044 mProbers[6] = new nsSingleByteCharSetProber(&Latin7Model); 00045 mProbers[7] = new nsSingleByteCharSetProber(&Win1253Model); 00046 mProbers[8] = new nsSingleByteCharSetProber(&Latin5BulgarianModel); 00047 mProbers[9] = new nsSingleByteCharSetProber(&Win1251BulgarianModel); 00048 00049 nsHebrewProber *hebprober = new nsHebrewProber(); 00050 // Notice: Any change in these indexes - 10,11,12 must be reflected 00051 // in the code below as well. 00052 mProbers[10] = hebprober; 00053 mProbers[11] = new nsSingleByteCharSetProber(&Win1255Model, false, hebprober); // Logical Hebrew 00054 mProbers[12] = new nsSingleByteCharSetProber(&Win1255Model, true, hebprober); // Visual Hebrew 00055 mProbers[13] = new UnicodeGroupProber(); 00056 00057 // Tell the Hebrew prober about the logical and visual probers 00058 if (mProbers[10] && mProbers[11] && mProbers[12]) // all are not null 00059 { 00060 hebprober->SetModelProbers(mProbers[11], mProbers[12]); 00061 } 00062 else // One or more is null. avoid any Hebrew probing, null them all 00063 { 00064 for (unsigned int i = 10; i <= 12; ++i) 00065 { 00066 delete mProbers[i]; 00067 mProbers[i] = 0; 00068 } 00069 } 00070 00071 // disable latin2 before latin1 is available, otherwise all latin1 00072 // will be detected as latin2 because of their similarity. 00073 //mProbers[10] = new nsSingleByteCharSetProber(&Latin2HungarianModel); 00074 //mProbers[11] = new nsSingleByteCharSetProber(&Win1250HungarianModel); 00075 00076 Reset(); 00077 } 00078 00079 nsSBCSGroupProber::~nsSBCSGroupProber() 00080 { 00081 for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++) 00082 { 00083 delete mProbers[i]; 00084 } 00085 } 00086 00087 00088 const char* nsSBCSGroupProber::GetCharSetName() 00089 { 00090 //if we have no answer yet 00091 if (mBestGuess == -1) 00092 { 00093 GetConfidence(); 00094 //no charset seems positive 00095 if (mBestGuess == -1) 00096 //we will use default. 00097 mBestGuess = 0; 00098 } 00099 return mProbers[mBestGuess]->GetCharSetName(); 00100 } 00101 00102 void nsSBCSGroupProber::Reset(void) 00103 { 00104 mActiveNum = 0; 00105 for (unsigned int i = 0; i < NUM_OF_SBCS_PROBERS; i++) 00106 { 00107 if (mProbers[i]) // not null 00108 { 00109 mProbers[i]->Reset(); 00110 mIsActive[i] = true; 00111 ++mActiveNum; 00112 } 00113 else 00114 mIsActive[i] = false; 00115 } 00116 mBestGuess = -1; 00117 mState = eDetecting; 00118 } 00119 00120 00121 nsProbingState nsSBCSGroupProber::HandleData(const char* aBuf, unsigned int aLen) 00122 { 00123 nsProbingState st; 00124 unsigned int i; 00125 char *newBuf1 = 0; 00126 unsigned int newLen1 = 0; 00127 00128 //apply filter to original buffer, and we got new buffer back 00129 //depend on what script it is, we will feed them the new buffer 00130 //we got after applying proper filter 00131 //this is done without any consideration to KeepEnglishLetters 00132 //of each prober since as of now, there are no probers here which 00133 //recognize languages with English characters. 00134 if (!FilterWithoutEnglishLetters(aBuf, aLen, &newBuf1, newLen1)) 00135 goto done; 00136 00137 if (newLen1 == 0) 00138 goto done; // Nothing to see here, move on. 00139 00140 for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i) 00141 { 00142 if (!mIsActive[i]) 00143 continue; 00144 st = mProbers[i]->HandleData(newBuf1, newLen1); 00145 if (st == eFoundIt) 00146 { 00147 mBestGuess = i; 00148 mState = eFoundIt; 00149 break; 00150 } 00151 else if (st == eNotMe) 00152 { 00153 mIsActive[i] = false; 00154 mActiveNum--; 00155 if (mActiveNum <= 0) 00156 { 00157 mState = eNotMe; 00158 break; 00159 } 00160 } 00161 } 00162 00163 done: 00164 free(newBuf1); 00165 00166 return mState; 00167 } 00168 00169 float nsSBCSGroupProber::GetConfidence(void) 00170 { 00171 unsigned int i; 00172 float bestConf = 0.0, cf; 00173 00174 switch (mState) 00175 { 00176 case eFoundIt: 00177 return (float)0.99; //sure yes 00178 case eNotMe: 00179 return (float)0.01; //sure no 00180 default: 00181 for (i = 0; i < NUM_OF_SBCS_PROBERS; ++i) 00182 { 00183 if (!mIsActive[i]) 00184 continue; 00185 cf = mProbers[i]->GetConfidence(); 00186 if (bestConf < cf) 00187 { 00188 bestConf = cf; 00189 mBestGuess = i; 00190 } 00191 } 00192 } 00193 return bestConf; 00194 } 00195 00196 #ifdef DEBUG_PROBE 00197 void nsSBCSGroupProber::DumpStatus() 00198 { 00199 unsigned int i; 00200 float cf; 00201 00202 cf = GetConfidence(); 00203 printf(" SBCS Group Prober --------begin status \r\n"); 00204 for (i = 0; i < NUM_OF_SBCS_PROBERS; i++) 00205 { 00206 if (!mIsActive[i]) 00207 printf(" inactive: [%s] (i.e. confidence is too low).\r\n", mProbers[i]->GetCharSetName()); 00208 else 00209 mProbers[i]->DumpStatus(); 00210 } 00211 printf(" SBCS Group found best match [%s] confidence %f.\r\n", 00212 mProbers[mBestGuess]->GetCharSetName(), cf); 00213 } 00214 #endif 00215 } 00216 00217
KDE 4.6 API Reference