00001
00002
00003
00004
00005
00006
00007
00008
00009 #ifndef MECAB_MECAB_H
00010 #define MECAB_MECAB_H
00011
00012
00013 struct mecab_dictionary_info_t {
00014 const char *filename;
00015 const char *charset;
00016 unsigned int size;
00017 int type;
00018 unsigned int lsize;
00019 unsigned int rsize;
00020 unsigned short version;
00021 struct mecab_dictionary_info_t *next;
00022 };
00023
00024 struct mecab_path_t {
00025 struct mecab_node_t* rnode;
00026 struct mecab_path_t* rnext;
00027 struct mecab_node_t* lnode;
00028 struct mecab_path_t* lnext;
00029 int cost;
00030 float prob;
00031 };
00032
00033 struct mecab_learner_path_t {
00034 struct mecab_learner_node_t* rnode;
00035 struct mecab_learner_path_t* rnext;
00036 struct mecab_learner_node_t* lnode;
00037 struct mecab_learner_path_t* lnext;
00038 double cost;
00039 const int *fvector;
00040 };
00041
00042 struct mecab_token_t {
00043 unsigned short lcAttr;
00044 unsigned short rcAttr;
00045 unsigned short posid;
00046 short wcost;
00047 unsigned int feature;
00048 unsigned int compound;
00049 };
00050
00051 struct mecab_node_t {
00052 struct mecab_node_t *prev;
00053 struct mecab_node_t *next;
00054 struct mecab_node_t *enext;
00055 struct mecab_node_t *bnext;
00056 struct mecab_path_t *rpath;
00057 struct mecab_path_t *lpath;
00058 struct mecab_node_t **begin_node_list;
00059 struct mecab_node_t **end_node_list;
00060 const char *surface;
00061 const char *feature;
00062 unsigned int id;
00063 unsigned short length;
00064 unsigned short rlength;
00065 unsigned short rcAttr;
00066 unsigned short lcAttr;
00067 unsigned short posid;
00068 unsigned char char_type;
00069 unsigned char stat;
00070 unsigned char isbest;
00071 unsigned int sentence_length;
00072 float alpha;
00073 float beta;
00074 float prob;
00075 short wcost;
00076 long cost;
00077 struct mecab_token_t *token;
00078 };
00079
00080
00081
00082 struct mecab_learner_node_t {
00083 struct mecab_learner_node_t *prev;
00084 struct mecab_learner_node_t *next;
00085 struct mecab_learner_node_t *enext;
00086 struct mecab_learner_node_t *bnext;
00087 struct mecab_learner_path_t *rpath;
00088 struct mecab_learner_path_t *lpath;
00089 struct mecab_learner_node_t *anext;
00090 const char *surface;
00091 const char *feature;
00092 unsigned int id;
00093 unsigned short length;
00094 unsigned short rlength;
00095 unsigned short rcAttr;
00096 unsigned short lcAttr;
00097 unsigned short posid;
00098 unsigned char char_type;
00099 unsigned char stat;
00100 unsigned char isbest;
00101 double alpha;
00102 double beta;
00103 short wcost2;
00104 double wcost;
00105 double cost;
00106 const int *fvector;
00107 struct mecab_token_t *token;
00108 };
00109
00110 #define MECAB_NOR_NODE 0
00111 #define MECAB_UNK_NODE 1
00112 #define MECAB_BOS_NODE 2
00113 #define MECAB_EOS_NODE 3
00114
00115 #define MECAB_USR_DIC 1
00116 #define MECAB_SYS_DIC 0
00117 #define MECAB_UNK_DIC 2
00118
00119
00120 #ifdef __cplusplus
00121 #include <cstdio>
00122 #else
00123 #include <stdio.h>
00124 #endif
00125
00126 #ifdef __cplusplus
00127 extern "C" {
00128 #endif
00129
00130 #ifdef _WIN32
00131 #include <windows.h>
00132 # ifdef DLL_EXPORT
00133 # define MECAB_DLL_EXTERN __declspec(dllexport)
00134 # else
00135 # define MECAB_DLL_EXTERN __declspec(dllimport)
00136 # endif
00137 #endif
00138
00139 #ifndef MECAB_DLL_EXTERN
00140 # define MECAB_DLL_EXTERN extern
00141 #endif
00142
00143 typedef struct mecab_t mecab_t;
00144 typedef struct mecab_dictionary_info_t mecab_dictionary_info_t;
00145 typedef struct mecab_node_t mecab_node_t;
00146 typedef struct mecab_learner_node_t mecab_learner_node_t;
00147 typedef struct mecab_path_t mecab_path_t;
00148 typedef struct mecab_learner_path_t mecab_learner_path_t;
00149 typedef struct mecab_token_t mecab_token_t;
00150
00151 #ifndef SWIG
00152
00153 MECAB_DLL_EXTERN int mecab_do (int argc, char **argv);
00154
00155 MECAB_DLL_EXTERN mecab_t* mecab_new(int argc, char **argv);
00156 MECAB_DLL_EXTERN mecab_t* mecab_new2(const char *arg);
00157 MECAB_DLL_EXTERN const char* mecab_version();
00158 MECAB_DLL_EXTERN const char* mecab_strerror(mecab_t *mecab);
00159 MECAB_DLL_EXTERN void mecab_destroy(mecab_t *mecab);
00160
00161 MECAB_DLL_EXTERN int mecab_get_partial(mecab_t *mecab);
00162 MECAB_DLL_EXTERN void mecab_set_partial(mecab_t *mecab, int partial);
00163 MECAB_DLL_EXTERN float mecab_get_theta(mecab_t *mecab);
00164 MECAB_DLL_EXTERN void mecab_set_theta(mecab_t *mecab, float theta);
00165 MECAB_DLL_EXTERN int mecab_get_lattice_level(mecab_t *mecab);
00166 MECAB_DLL_EXTERN void mecab_set_lattice_level(mecab_t *mecab, int level);
00167 MECAB_DLL_EXTERN int mecab_get_all_morphs(mecab_t *mecab);
00168 MECAB_DLL_EXTERN void mecab_set_all_morphs(mecab_t *mecab, int all_morphs);
00169
00170 MECAB_DLL_EXTERN const char* mecab_sparse_tostr(mecab_t *mecab, const char *str);
00171 MECAB_DLL_EXTERN const char* mecab_sparse_tostr2(mecab_t *mecab, const char *str, size_t len);
00172 MECAB_DLL_EXTERN char* mecab_sparse_tostr3(mecab_t *mecab, const char *str, size_t len,
00173 char *ostr, size_t olen);
00174 MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode(mecab_t *mecab, const char*);
00175 MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode2(mecab_t *mecab, const char*, size_t);
00176 MECAB_DLL_EXTERN const char* mecab_nbest_sparse_tostr(mecab_t *mecab, size_t N, const char *str);
00177 MECAB_DLL_EXTERN const char* mecab_nbest_sparse_tostr2(mecab_t *mecab, size_t N,
00178 const char *str, size_t len);
00179 MECAB_DLL_EXTERN char* mecab_nbest_sparse_tostr3(mecab_t *mecab, size_t N,
00180 const char *str, size_t len,
00181 char *ostr, size_t olen);
00182 MECAB_DLL_EXTERN int mecab_nbest_init(mecab_t *mecab, const char *str);
00183 MECAB_DLL_EXTERN int mecab_nbest_init2(mecab_t *mecab, const char *str, size_t len);
00184 MECAB_DLL_EXTERN const char* mecab_nbest_next_tostr(mecab_t *mecab);
00185 MECAB_DLL_EXTERN char* mecab_nbest_next_tostr2(mecab_t *mecab, char *ostr, size_t olen);
00186 MECAB_DLL_EXTERN const mecab_node_t* mecab_nbest_next_tonode(mecab_t *mecab);
00187 MECAB_DLL_EXTERN const char* mecab_format_node(mecab_t *mecab, const mecab_node_t *node);
00188 MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_dictionary_info(mecab_t *mecab);
00189 MECAB_DLL_EXTERN int mecab_dict_index(int argc, char **argv);
00190 MECAB_DLL_EXTERN int mecab_dict_gen(int argc, char **argv);
00191 MECAB_DLL_EXTERN int mecab_cost_train(int argc, char **argv);
00192 MECAB_DLL_EXTERN int mecab_system_eval(int argc, char **argv);
00193 MECAB_DLL_EXTERN int mecab_test_gen(int argc, char **argv);
00194 #endif
00195
00196 #ifdef __cplusplus
00197 }
00198 #endif
00199
00200
00201 #ifdef __cplusplus
00202
00203 namespace MeCab {
00204 typedef struct mecab_dictionary_info_t DictionaryInfo;
00205 typedef struct mecab_path_t Path;
00206 typedef struct mecab_node_t Node;
00207 typedef struct mecab_learner_path_t LearnerPath;
00208 typedef struct mecab_learner_node_t LearnerNode;
00209 typedef struct mecab_token_t Token;
00210
00211 class Tagger {
00212 public:
00213
00214 #ifndef SWIG
00215 virtual const char* parse(const char *str, size_t len, char *ostr, size_t olen) = 0;
00216 virtual const char* parse(const char *str, size_t len) = 0;
00217 virtual const Node* parseToNode(const char *str, size_t len) = 0;
00218 virtual const char* parseNBest(size_t N, const char *str, size_t len) = 0;
00219 virtual bool parseNBestInit(const char *str, size_t len) = 0;
00220 #endif
00221
00222 virtual const char* parse(const char *str) = 0;
00223 virtual const Node* parseToNode(const char *str) = 0;
00224 virtual const char* parseNBest(size_t N, const char *str) = 0;
00225 virtual bool parseNBestInit(const char *str) = 0;
00226 virtual const Node* nextNode() = 0;
00227 virtual const char* next() = 0;
00228 virtual const char* formatNode(const Node *node) = 0;
00229
00230
00231 virtual bool partial() const = 0;
00232 virtual void set_partial(bool partial) = 0;
00233 virtual float theta() const = 0;
00234 virtual void set_theta(float theta) = 0;
00235 virtual int lattice_level() const = 0;
00236 virtual void set_lattice_level(int level) = 0;
00237 virtual bool all_morphs() const = 0;
00238 virtual void set_all_morphs(bool all_morphs) = 0;
00239
00240 #ifndef SWIG
00241 virtual const char* next(char *ostr , size_t olen) = 0;
00242 virtual const char* parseNBest(size_t N, const char *str,
00243 size_t len, char *ostr, size_t olen) = 0;
00244 virtual const char* formatNode(const Node *node, char *ostr, size_t olen) = 0;
00245 #endif
00246
00247 virtual const DictionaryInfo* dictionary_info() const = 0;
00248
00249 virtual const char* what() = 0;
00250
00251 virtual ~Tagger() {}
00252
00253 #ifndef SIWG
00254 static Tagger* create(int argc, char **argv);
00255 static Tagger* create(const char *arg);
00256 #endif
00257
00258 static const char *version();
00259 };
00260
00261
00262 MECAB_DLL_EXTERN Tagger *createTagger(int argc, char **argv);
00263 MECAB_DLL_EXTERN Tagger *createTagger(const char *arg);
00264 MECAB_DLL_EXTERN const char* getTaggerError();
00265 }
00266
00267 #endif
00268 #endif