KDECore
JpCntx.h
Go to the documentation of this file.
00001 /* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ 00002 /* -*- C++ -*- 00003 * Copyright (C) 1998 <developer@mozilla.org> 00004 * 00005 * 00006 * Permission is hereby granted, free of charge, to any person obtaining 00007 * a copy of this software and associated documentation files (the 00008 * "Software"), to deal in the Software without restriction, including 00009 * without limitation the rights to use, copy, modify, merge, publish, 00010 * distribute, sublicense, and/or sell copies of the Software, and to 00011 * permit persons to whom the Software is furnished to do so, subject to 00012 * the following conditions: 00013 * 00014 * The above copyright notice and this permission notice shall be included 00015 * in all copies or substantial portions of the Software. 00016 * 00017 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 00018 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 00019 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 00020 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 00021 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 00022 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 00023 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 00024 */ 00025 00026 #ifndef __JPCNTX_H__ 00027 #define __JPCNTX_H__ 00028 00029 #include "kdemacros.h" 00030 00031 #define NUM_OF_CATEGORY 6 00032 00033 #define ENOUGH_REL_THRESHOLD 100 00034 #define MAX_REL_THRESHOLD 1000 00035 namespace kencodingprober { 00036 //hiragana frequency category table 00037 extern const char jp2CharContext[83][83]; 00038 00039 class KDE_NO_EXPORT JapaneseContextAnalysis 00040 { 00041 public: 00042 JapaneseContextAnalysis() {Reset();}; 00043 virtual ~JapaneseContextAnalysis() {}; 00044 00045 void HandleData(const char* aBuf, unsigned int aLen); 00046 00047 void HandleOneChar(const char* aStr, unsigned int aCharLen) 00048 { 00049 int order; 00050 00051 //if we received enough data, stop here 00052 if (mTotalRel > MAX_REL_THRESHOLD) mDone = true; 00053 if (mDone) return; 00054 00055 //Only 2-bytes characters are of our interest 00056 order = (aCharLen == 2) ? GetOrder(aStr) : -1; 00057 if (order != -1 && mLastCharOrder != -1) 00058 { 00059 mTotalRel++; 00060 //count this sequence to its category counter 00061 mRelSample[(int)jp2CharContext[mLastCharOrder][order]]++; 00062 } 00063 mLastCharOrder = order; 00064 }; 00065 00066 float GetConfidence(); 00067 void Reset(void); 00068 void SetOpion(){}; 00069 bool GotEnoughData() {return mTotalRel > ENOUGH_REL_THRESHOLD;}; 00070 00071 protected: 00072 virtual int GetOrder(const char* str, unsigned int *charLen) = 0; 00073 virtual int GetOrder(const char* str) = 0; 00074 00075 //category counters, each interger counts sequence in its category 00076 unsigned int mRelSample[NUM_OF_CATEGORY]; 00077 00078 //total sequence received 00079 unsigned int mTotalRel; 00080 00081 //The order of previous char 00082 int mLastCharOrder; 00083 00084 //if last byte in current buffer is not the last byte of a character, we 00085 //need to know how many byte to skip in next buffer. 00086 unsigned int mNeedToSkipCharNum; 00087 00088 //If this flag is set to true, detection is done and conclusion has been made 00089 bool mDone; 00090 }; 00091 00092 00093 class KDE_NO_EXPORT SJISContextAnalysis : public JapaneseContextAnalysis 00094 { 00095 //SJISContextAnalysis(){}; 00096 protected: 00097 int GetOrder(const char* str, unsigned int *charLen); 00098 00099 int GetOrder(const char* str) 00100 { 00101 //We only interested in Hiragana, so first byte is '\202' 00102 if (*str == '\202' && 00103 (unsigned char)*(str+1) >= (unsigned char)0x9f && 00104 (unsigned char)*(str+1) <= (unsigned char)0xf1) 00105 return (unsigned char)*(str+1) - (unsigned char)0x9f; 00106 return -1; 00107 }; 00108 }; 00109 00110 class KDE_NO_EXPORT EUCJPContextAnalysis : public JapaneseContextAnalysis 00111 { 00112 protected: 00113 int GetOrder(const char* str, unsigned int *charLen); 00114 int GetOrder(const char* str) 00115 //We only interested in Hiragana, so first byte is '\244' 00116 { 00117 if (*str == '\244' && 00118 (unsigned char)*(str+1) >= (unsigned char)0xa1 && 00119 (unsigned char)*(str+1) <= (unsigned char)0xf3) 00120 return (unsigned char)*(str+1) - (unsigned char)0xa1; 00121 return -1; 00122 }; 00123 }; 00124 } 00125 #endif /* __JPCNTX_H__ */ 00126
KDE 4.6 API Reference