libUnihan  0.5.3
Macros | Typedefs | Enumerations | Functions | Variables
Unihan_phonetic.h File Reference

Phonetic symbols (PinYin and ZhuYin) processing functions. More...

#include <sqlite3.h>

Go to the source code of this file.

Macros

#define PINYIN_MAX_LENGTH   9
 Maximum length of pinyin in byte.
 
#define ZHUYIN_MAX_LENGTH   13
 Maximum length of zhuyin in byte.
 
#define ZHUYIN_SYMBOL_COUNT   ZHUYIN_SYMBOL_NEUTRAL + 1
 Total number of support ZhuYin symbols.
 

Typedefs

typedef gunichar ZhuYin_Symbol
 ZhuYin symbol.
 
typedef char ZhuYin
 Pronunciation in ZhuYin UTF-8 string.
 
typedef char PinYin
 Pronunciation in PinYin UTF-8 string.
 

Enumerations

enum  ZhuYin_Symbol_Id {
  ZHUYIN_INVALID_SYMBOL = -1, ZHUYIN_SYMBOL_B, ZHUYIN_SYMBOL_P, ZHUYIN_SYMBOL_M,
  ZHUYIN_SYMBOL_F, ZHUYIN_SYMBOL_D, ZHUYIN_SYMBOL_T, ZHUYIN_SYMBOL_N,
  ZHUYIN_SYMBOL_L, ZHUYIN_SYMBOL_G, ZHUYIN_SYMBOL_K, ZHUYIN_SYMBOL_H,
  ZHUYIN_SYMBOL_J, ZHUYIN_SYMBOL_Q, ZHUYIN_SYMBOL_X, ZHUYIN_SYMBOL_ZH,
  ZHUYIN_SYMBOL_CH, ZHUYIN_SYMBOL_SH, ZHUYIN_SYMBOL_R, ZHUYIN_SYMBOL_Z,
  ZHUYIN_SYMBOL_C, ZHUYIN_SYMBOL_S, ZHUYIN_SYMBOL_I, ZHUYIN_SYMBOL_U,
  ZHUYIN_SYMBOL_U_DIAERESIS, ZHUYIN_SYMBOL_A, ZHUYIN_SYMBOL_O, ZHUYIN_SYMBOL_E,
  ZHUYIN_SYMBOL_E_CIRCUMFLEX, ZHUYIN_SYMBOL_AI, ZHUYIN_SYMBOL_EI, ZHUYIN_SYMBOL_AO,
  ZHUYIN_SYMBOL_OU, ZHUYIN_SYMBOL_AN, ZHUYIN_SYMBOL_EN, ZHUYIN_SYMBOL_ANG,
  ZHUYIN_SYMBOL_ENG, ZHUYIN_SYMBOL_ER, ZHUYIN_SYMBOL_1, ZHUYIN_SYMBOL_2,
  ZHUYIN_SYMBOL_3, ZHUYIN_SYMBOL_4, ZHUYIN_SYMBOL_NEUTRAL
}
 Enumeration of ZhuYin symbols. More...
 
enum  PinYin_Accent_Format {
  PINYIN_ACCENT_ALWAYS, PINYIN_ACCENT_ORIGINAL, PINYIN_ACCENT_UNIHAN, PINYIN_ACCENT_TRAILING,
  PINYIN_ACCENT_INPUT_METHOD, PINYIN_ACCENT_NONE
}
 Enumeration of PinYin accent (not tone mark) handling modes. More...
 
enum  ZhuYin_ToneMark_Format { ZHUYIN_TONEMARK_ALWAYS, ZHUYIN_TONEMARK_ORIGINAL, ZHUYIN_TONEMARK_INPUT_METHOD, ZHUYIN_TONEMARK_NUMERICAL }
 Enumeration of ZhuYin tone mark handling. More...
 

Functions

PinYinpinYin_new (const char *pinYin_str)
 New a PinYin instance. More...
 
guint pinYin_get_tone (const PinYin *pinYin)
 Return the explicit-specified tone of PinYin. More...
 
guint pinYin_strip_tone (PinYin *pinYin)
 Strip the tone mark of PinYin and return explicit-specified the tone Id. More...
 
void pinYin_add_tone (PinYin *pinYin, guint tone, gboolean useTrailNumber)
 Add the tone mark to pinYin. More...
 
PinYinpinYin_convert_accent_format (const PinYin *pinYin, PinYin_Accent_Format toFormat, gboolean useTrailNumber)
 Convert a PinYin to new accent formatReturn a newly allocated PinYin instance which contains the converted content. More...
 
ZhuYinpinYin_to_zhuYin (const PinYin *pinYin, ZhuYin_ToneMark_Format toFormat)
 PinYin to ZhuYin. More...
 
ZhuYinzhuYin_new (const char *zhuYin_str)
 New a ZhuYin instance. More...
 
guint zhuYin_get_tone (const ZhuYin *zhuYin)
 Return the explicit-specified tone of ZhuYin. More...
 
guint zhuYin_strip_tone (ZhuYin *zhuYin)
 Strip the tone mark of ZhuYin and return explicit-specified the tone Id. More...
 
void zhuYin_add_tone (ZhuYin *zhuYin, guint tone, ZhuYin_ToneMark_Format toFormat)
 Add the tone mark to zhuYin. More...
 
ZhuYinzhuYin_convert_toneMark_format (const ZhuYin *zhuYin, ZhuYin_ToneMark_Format toFormat)
 Convert zhuyin to another tone mark format. More...
 
PinYinzhuYin_to_pinYin (const ZhuYin *zhuYin, PinYin_Accent_Format toFormat, gboolean useTrailNumber)
 ZhuYin to PinYin. More...
 
ZhuYin_Symbol zhuYin_Symbol_from_id (ZhuYin_Symbol_Id id)
 Return the ZhuYin symbol by its Id. More...
 
ZhuYin_Symbol_Id zhuYin_Symbol_get_id (ZhuYin_Symbol zSym)
 Return the Id of a ZhuYin symbol. More...
 
gboolean zhuYin_Symbol_is_initial (ZhuYin_Symbol zSym)
 Whether the zhuYin symbol is an initial. More...
 
gboolean zhuYin_Symbol_is_medial (ZhuYin_Symbol zSym)
 Whether the zhuYin symbol is an medial. More...
 
gboolean zhuYin_Symbol_is_final (ZhuYin_Symbol zSym)
 Whether the zhuYin symbol is an final. More...
 
gboolean zhuYin_Symbol_is_tone (ZhuYin_Symbol zSym)
 Whether the zhuYin symbol is either a toneMark or number which indicates the tone. More...
 
guint zhuYin_Symbol_to_toneMark_id (ZhuYin_Symbol zSym)
 Return the tone id of given tone mark. More...
 
ZhuYin_Symbol zhuYin_Symbol_from_toneMark_id (guint toneMark_id)
 Return the tone mark of given tone id. More...
 
void pinYin_convert_accent_format_scalar_func (sqlite3_context *context, int argc, sqlite3_value **argv)
 PinYin convert accent format scalar function for SQL command call. More...
 
void pinYin_to_zhuYin_scalar_func (sqlite3_context *context, int argc, sqlite3_value **argv)
 PinYin to ZhuYin converting scalar function for SQL command call. More...
 
void zhuYin_convert_toneMark_format_scalar_func (sqlite3_context *context, int argc, sqlite3_value **argv)
 ZhuYin convert accent format scalar function for SQL command call. More...
 
void zhuYin_to_pinYin_scalar_func (sqlite3_context *context, int argc, sqlite3_value **argv)
 ZhuYin to PinYin converting scalar function for SQL command call. More...
 

Variables

const ZhuYin_Symbol ZHUYIN_SYMBOL_LIST []
 An array of ZhuYin symbols.
 

Detailed Description

This header file lists the functions for PinYin and ZhuYin processing, such as conversion between HanYu pinyin conversion C functions, and corresponding SQL scalar functions.

PinYin is displayed and stored as uppercase, which is same with kMandarin in Unihan.

Note that this header is included in Unihan.h, so no need to includes it explicitly if Unihan.h is also included.

Enumeration Type Documentation

There are two PinYin symbols with accents, diaeresis U (Ü,ㄩ), and circumflex E (Ê,ㄝ) . As the their pronunciations are different from U and E.

In Romanization of Chinese (ISO 7098:1991), under certain circumstances, accents can be omitted, such as JÜ -> JU , QÜ ->QU. This is adopted in PRC education system. Use PINYIN_ACCENT_ORIGINAL for this purpose.

Unihan project does not have circumflex E (Ê), the other is same with ISO 7098. Use PINYIN_ACCENT_UNIHAN for Unihan project.

Because Ü is not on most of the keyboards, there are various ways to represent Ü. For example, CEDICT use U: (PINYIN_ACCENT_TRAILING); while most of Chinese input method use V as substitute (PINYIN_ACCENT_INPUT_METHOD).

In English documents such as passport, the accent are usually ignores, use PINYIN_ACCENT_NONE for this purpose.

Preserving accents unconditionally makes conversion and education easier, use PINYIN_ACCENT_ALWAYS for this purpose.

Note that this enumeration is not for the pinyin tone mark. See pinYin_convert_accent_format() pinyin tone mark handling.

See also
pinYin_convert_accent_format()
Enumerator
PINYIN_ACCENT_ALWAYS 

Ü is always represented as Ü, Ê is always represented as Ê.

PINYIN_ACCENT_ORIGINAL 

MOE CN standard ISO 7098:1991.

PINYIN_ACCENT_UNIHAN 

Ü is represented as Ü, Ê is represented as E.

PINYIN_ACCENT_TRAILING 

Ü is represented as U:, Ê is represented as E.

PINYIN_ACCENT_INPUT_METHOD 

Ü is represented as V, Ê is represented as E.

PINYIN_ACCENT_NONE 

Ü is represented as U, Ê is represented as E.

This enumeration lists the ZhuYin symbols, including the symbols for tone mark. Corresponding PinYin phonemes can also be located using these Ids.

See also
pinYin_phoneme_from_id()
pinYin_phoneme_get_id()
zhuYin_Symbol_from_id()
zhuYin_Symbol_get_id()
Enumerator
ZHUYIN_INVALID_SYMBOL 

Invalid ZhuYin Symbol.

ZHUYIN_SYMBOL_B 

ZhuYin symbol 'ㄅ'.

ZHUYIN_SYMBOL_P 

ZhuYin symbol 'ㄆ'.

ZHUYIN_SYMBOL_M 

ZhuYin symbol 'ㄇ'.

ZHUYIN_SYMBOL_F 

ZhuYin symbol 'ㄈ'.

ZHUYIN_SYMBOL_D 

ZhuYin symbol 'ㄉ'.

ZHUYIN_SYMBOL_T 

ZhuYin symbol 'ㄊ'.

ZHUYIN_SYMBOL_N 

ZhuYin symbol 'ㄋ'.

ZHUYIN_SYMBOL_L 

ZhuYin symbol 'ㄌ'.

ZHUYIN_SYMBOL_G 

ZhuYin symbol 'ㄍ'.

ZHUYIN_SYMBOL_K 

ZhuYin symbol 'ㄎ'.

ZHUYIN_SYMBOL_H 

ZhuYin symbol 'ㄏ'.

ZHUYIN_SYMBOL_J 

ZhuYin symbol 'ㄐ'.

ZHUYIN_SYMBOL_Q 

ZhuYin symbol 'ㄑ'.

ZHUYIN_SYMBOL_X 

ZhuYin symbol 'ㄒ'.

ZHUYIN_SYMBOL_ZH 

ZhuYin symbol 'ㄓ'.

ZHUYIN_SYMBOL_CH 

ZhuYin symbol 'ㄔ'.

ZHUYIN_SYMBOL_SH 

ZhuYin symbol 'ㄕ'.

ZHUYIN_SYMBOL_R 

ZhuYin symbol 'ㄖ'.

ZHUYIN_SYMBOL_Z 

ZhuYin symbol 'ㄗ'.

ZHUYIN_SYMBOL_C 

ZhuYin symbol 'ㄘ'.

ZHUYIN_SYMBOL_S 

ZhuYin symbol 'ㄙ'.

ZHUYIN_SYMBOL_I 

ZhuYin symbol 'ㄧ'.

ZHUYIN_SYMBOL_U 

ZhuYin symbol 'ㄨ'.

ZHUYIN_SYMBOL_U_DIAERESIS 

ZhuYin symbol 'ㄩ'.

ZHUYIN_SYMBOL_A 

ZhuYin symbol 'ㄚ'.

ZHUYIN_SYMBOL_O 

ZhuYin symbol 'ㄛ'.

ZHUYIN_SYMBOL_E 

ZhuYin symbol 'ㄜ'.

ZHUYIN_SYMBOL_E_CIRCUMFLEX 

ZhuYin symbol 'ㄝ'.

ZHUYIN_SYMBOL_AI 

ZhuYin symbol 'ㄞ'.

ZHUYIN_SYMBOL_EI 

ZhuYin symbol 'ㄟ'.

ZHUYIN_SYMBOL_AO 

ZhuYin symbol 'ㄠ'.

ZHUYIN_SYMBOL_OU 

ZhuYin symbol 'ㄡ'.

ZHUYIN_SYMBOL_AN 

ZhuYin symbol 'ㄢ'.

ZHUYIN_SYMBOL_EN 

ZhuYin symbol 'ㄣ'.

ZHUYIN_SYMBOL_ANG 

ZhuYin symbol 'ㄤ'.

ZHUYIN_SYMBOL_ENG 

ZhuYin symbol 'ㄥ'.

ZHUYIN_SYMBOL_ER 

ZhuYin symbol 'ㄦ'.

ZHUYIN_SYMBOL_1 

ZhuYin 1st tone mark 'ˉ'.

ZHUYIN_SYMBOL_2 

ZhuYin 2nd tone mark 'ˊ'.

ZHUYIN_SYMBOL_3 

ZhuYin 3rd tone mark 'ˇ'.

ZHUYIN_SYMBOL_4 

ZhuYin 4th tone mark 'ˋ'.

ZHUYIN_SYMBOL_NEUTRAL 

ZhuYin neutral (5th) tone mark '˙'.

Originally, the neutral (fifth) tone mark of zhuyin is put in the front, while the first tone mark is omitted. Use ZHUYIN_TONEMARK_ORIGINAL for this.

However, for Zhuyin based input method, the neutral is put in the back. Use ZHUYIN_TONEMARK_INPUT_METHOD for this.

If preserving tonemark is desireable, then use ZHUYIN_TONEMARK_ALWAYS, in which the first tone is not omitted, otherwise is same with ZHUYIN_TONEMARK_INPUT_METHOD.

If numerical tone mark is desired, use ZHUYIN_TONEMARK_NUMERICAL.

See also
zhuYin_convert_toneMark_format()
Enumerator
ZHUYIN_TONEMARK_ALWAYS 

Neutral (fifth) tone mark is put in the end, while the first tone mark is kept.

ZHUYIN_TONEMARK_ORIGINAL 

Neutral (fifth) tone mark is put in the front, while the first tone mark is omitted.

ZHUYIN_TONEMARK_INPUT_METHOD 

Neutral (fifth) tone mark is put in the front, while the first tone mark is omitted.

ZHUYIN_TONEMARK_NUMERICAL 

Tone mark are represented as numerical, in the end of Zhuyin.

Function Documentation

void pinYin_add_tone ( PinYin pinYin,
guint  tone,
gboolean  useTrailNumber 
)

This function add tone mark to zhuYin, existing tone will be removed before adding new tone. If tone is 0, then existing tone will be removed, but no new tone will be added.

The result will be stored in pinYin, so backup it with strdup() or g_strdup() to keep the original.

Parameters
pinYinthe pinYin instance to be processed.
tonethe tone to be added.
useTrailNumberTRUE trailing number is preferred, FALSE to use traditional tonemark.
PinYin* pinYin_convert_accent_format ( const PinYin pinYin,
PinYin_Accent_Format  toFormat,
gboolean  useTrailNumber 
)

Unlike pinYin_get_tone() and pinYin_strip_tone() which only identify the explicit-specified tone, this function treats the unspecified tone as 5th tone, unless SQL wild characters '' and '_' are encountered.

Use g_free to free the newly allocated instance.

Parameters
pinYinthe PinYin to be converted.
toFormatthe PinYin accent mode to be converted to.
useTrailNumberTRUE trailing number is preferred, FALSE to use traditional tonemark.
Returns
a newly allocated converted PinYin instance.
See also
zhuYin_to_pinYin()
void pinYin_convert_accent_format_scalar_func ( sqlite3_context *  context,
int  argc,
sqlite3_value **  argv 
)

This function is meant to be called by sqlite3_create_function() and used in SQL command. Do not use it directly.

Parameters
contextThe sqlite3_context.
argcNumber of argument expected.
argvArguments for this scalar function .
guint pinYin_get_tone ( const PinYin pinYin)

This function finds and returns the explicit-specified tone of pinYin. Thus 0 will be returned if pinYin does not have any explicit-specified tone.

This function acts this way in order to accommodate the SQL LIKE query such as ... WHERE kMandarin LIKE 'KE'.

Sometimes, 5th tone mark is omitted, please convert the value 0 to 5 if this is the case.

Parameters
pinYinthe pinYin instance to be stripped.
Returns
the tone id from 1 to 5 if the tone is explicit-specified, 0 otherwise.
See also
pinYin_strip_tone()
PinYin* pinYin_new ( const char *  pinYin_str)

This function allocate a new PinYin instance. Non pinYin_str will be copied to the newly allocated PinYin instance and converted to uppercase. Note that the PinYin instance only hold PINYIN_MAX_LENGTH bytes, including the EOL ('\0') character. Longer pinYin will be truncated.

Note: use g_free to free the newly allocated instance.

Parameters
pinYin_strthe PinYin in string, NULL for blank instance.
Returns
new PinYin instances.
guint pinYin_strip_tone ( PinYin pinYin)

This function strips the tone mark of pinYin, otherwise is similar to pinYin_get_tone().

Parameters
pinYinthe pinYin instance to be stripped.
Returns
the tone id from 1 to 5 if the tone is explicit-specified, 0
See also
pinYin_get_tone()
ZhuYin* pinYin_to_zhuYin ( const PinYin pinYin,
ZhuYin_ToneMark_Format  toFormat 
)
Parameters
pinYinthe PinYin to be converted.
toFormatthe ZhuYin tone mark mode.
Returns
a newly located ZhuYin instance.
See also
zhuYin_convert_toneMark_format()
void pinYin_to_zhuYin_scalar_func ( sqlite3_context *  context,
int  argc,
sqlite3_value **  argv 
)

This function is meant to be called by sqlite3_create_function() and used in SQL command. Do not use it directly.

Parameters
contextThe sqlite3_context.
argcNumber of argument expected.
argvArguments for this scalar function .
void zhuYin_add_tone ( ZhuYin zhuYin,
guint  tone,
ZhuYin_ToneMark_Format  toFormat 
)

This function add tone mark to zhuYin, existing tone will be removed before adding new tone. If tone is 0, then existing tone will be removed, but no new tone will be added.

The result will be stored in zhuYin, so backup it with strdup() or g_strdup() to keep the original.

Parameters
zhuYinthe zhuYin instance to be processed.
tonethe tone to be added.
toFormatthe ZhuYin tone mark mode to be converted to.
ZhuYin* zhuYin_convert_toneMark_format ( const ZhuYin zhuYin,
ZhuYin_ToneMark_Format  toFormat 
)

Unlike zhuYin_get_tone() and zhuYin_strip_tone() which only identify the explicit-specified tone, this function treats the unspecified tone as 1st tone, unless SQL wild characters '' and '_' are encountered.

Note: use g_free to free the newly allocated instance.

Parameters
zhuYinthe ZhuYin to be converted.
toFormatthe ZhuYin tone mark mode to be converted to.
Returns
the newly allocated ZhuYin instance that
See also
pinYin_to_zhuYin()
void zhuYin_convert_toneMark_format_scalar_func ( sqlite3_context *  context,
int  argc,
sqlite3_value **  argv 
)

This function is meant to be called by sqlite3_create_function() and used in SQL command. Do not use it directly.

Parameters
contextThe sqlite3_context.
argcNumber of argument expected.
argvArguments for this scalar function .
guint zhuYin_get_tone ( const ZhuYin zhuYin)

This function finds and returns the explicit-specified tone of zhuYin. Thus 0 will be returned if zhuYin does not have any explicit-specified tone.

This function acts this way in order to accommodate the SQL LIKE query such as ... WHERE zhuYin LIKE 'ㄊㄧㄢ'.

Sometimes, 1th tone mark is omitted, please convert the value 0 to 1 if this is the case.

Parameters
zhuYinthe zhuYin instance to be stripped.
Returns
the tone id from 1 to 5 if the tone is explicit-specified, 0 otherwise.
See also
zhuYin_strip_tone()
ZhuYin* zhuYin_new ( const char *  zhuYin_str)

This function allocate a new ZhuYin instance. Non-NULL zhuYin_str will be copied to the new ZhuYin instance and converted to uppercase. Note that the ZhuYin instance only holds ZHUYIN_MAX_LENGTH bytes, including the EOL ('\0') character. Longer zhuYin will be truncated.

Note: use g_free to free the newly allocated instance.

Parameters
zhuYin_strthe ZhuYin in string, NULL for blank instance.
Returns
new ZhuYin instances.
guint zhuYin_strip_tone ( ZhuYin zhuYin)

This function strips the tone mark of zhuYin, otherwise is similar to zhuYin_get_tone().

Parameters
zhuYinthe zhuYin instance to be stripped.
Returns
the tone id from 1 to 5 if the tone is explicit-specified, 0
See also
zhuYin_get_tone()
ZhuYin_Symbol zhuYin_Symbol_from_id ( ZhuYin_Symbol_Id  id)
Parameters
idZhuYin symbol Id.
Returns
the corresponding symbol, or 0 if the id is negative.
ZhuYin_Symbol zhuYin_Symbol_from_toneMark_id ( guint  toneMark_id)
Parameters
toneMark_idtoneMark_id.
Returns
the ZhuYin symbol if id is between 1 to 5; returns 0 otherwise.
ZhuYin_Symbol_Id zhuYin_Symbol_get_id ( ZhuYin_Symbol  zSym)
Parameters
zSymZhuYin symbol.
Returns
the corresponding Id.
gboolean zhuYin_Symbol_is_final ( ZhuYin_Symbol  zSym)
Parameters
zSymZhuYin symbol.
Returns
TRUE if the zhuYin symbol is an final, FALSE otherwise.
gboolean zhuYin_Symbol_is_initial ( ZhuYin_Symbol  zSym)
Parameters
zSymZhuYin symbol.
Returns
TRUE if the zhuYin symbol is an initial, FALSE otherwise.
gboolean zhuYin_Symbol_is_medial ( ZhuYin_Symbol  zSym)
Parameters
zSymZhuYin symbol.
Returns
TRUE if the zhuYin symbol is an medial, FALSE otherwise.
gboolean zhuYin_Symbol_is_tone ( ZhuYin_Symbol  zSym)
Parameters
zSymZhuYin symbol.
Returns
TRUE if the zhuYin symbol is either a toneMark or number which indicates the tone, FALSE otherwise.
guint zhuYin_Symbol_to_toneMark_id ( ZhuYin_Symbol  zSym)
Parameters
zSymZhuYin symbol.
Returns
tone id if zSym is tone mark, 0 otherwise.
PinYin* zhuYin_to_pinYin ( const ZhuYin zhuYin,
PinYin_Accent_Format  toFormat,
gboolean  useTrailNumber 
)
Parameters
zhuYinthe ZhuYin to be converted.
toFormatthe PinYin accent mode.
useTrailNumberTRUE trailing number is preferred, FALSE to use traditional tonemark.
Returns
a newly located PinYin instance.
See also
pinYin_convert_accent_format()
void zhuYin_to_pinYin_scalar_func ( sqlite3_context *  context,
int  argc,
sqlite3_value **  argv 
)

This function is meant to be called by sqlite3_create_function() and used in SQL command. Do not use it directly.

Parameters
contextThe sqlite3_context.
argcNumber of argument expected.
argvArguments for this scalar function .