SHOGUN  3.2.1
 全部  命名空间 文件 函数 变量 类型定义 枚举 枚举值 友元 宏定义  
MultilabelLabels.cpp
浏览该文件的文档.
1 /*
2  * Copyright (C) 2013 Zuse-Institute-Berlin (ZIB)
3  * Copyright (C) 2013-2014 Thoralf Klein
4  * Written (W) 2013-2014 Thoralf Klein
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions are met:
9  *
10  * 1. Redistributions of source code must retain the above copyright notice, this
11  * list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright notice,
13  * this list of conditions and the following disclaimer in the documentation
14  * and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
20  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
21  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
22  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
23  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26  *
27  * The views and conclusions contained in the software and documentation are those
28  * of the authors and should not be interpreted as representing official policies,
29  * either expressed or implied, of the Shogun Development Team.
30  */
31 
33 
34 using namespace shogun;
35 
37  : CLabels()
38 {
39  init(0, 1);
40 }
41 
42 
44  : CLabels()
45 {
46  init(0, num_classes);
47 }
48 
49 
50 CMultilabelLabels::CMultilabelLabels(int32_t num_labels, int16_t num_classes)
51  : CLabels()
52 {
53  init(num_labels, num_classes);
54 }
55 
56 
58 {
59  delete[] m_labels;
60 }
61 
62 
63 void
64 CMultilabelLabels::init(int32_t num_labels, int16_t num_classes)
65 {
66  REQUIRE(num_labels >= 0, "num_labels=%d should be >= 0", num_labels);
67  REQUIRE(num_classes > 0, "num_classes=%d should be > 0", num_classes);
68 
69  // This one does consider the contained labels, so its simply BROKEN
70  // Can be disabled as
71  SG_ADD(&m_num_labels, "m_num_labels", "number of labels", MS_NOT_AVAILABLE);
72  SG_ADD(&m_num_classes, "m_num_classes", "number of classes", MS_NOT_AVAILABLE);
73  // SG_ADD((CSGObject**) &m_labels, "m_labels", "The labels", MS_NOT_AVAILABLE);
74 
75 
76  // Can only be enabled after this issue has been solved:
77  // https://github.com/shogun-toolbox/shogun/issues/1972
78 /* this->m_parameters->add(&m_num_labels, "m_num_labels",
79  "Number of labels.");
80  this->m_parameters->add(&m_num_classes, "m_num_classes",
81  "Number of classes.");
82  this->m_parameters->add_vector(&m_labels, &m_num_labels, "labels_array",
83  "The label vectors for all (num_labels) outputs.");
84 */
85 
86  m_num_labels = num_labels;
87  m_num_classes = num_classes;
89 }
90 
91 
92 void
93 CMultilabelLabels::ensure_valid(const char * context)
94 {
95  for (int32_t label_j = 0; label_j < get_num_labels(); label_j++)
96  {
97  if (sg_io->get_loglevel() == MSG_DEBUG && !m_labels[label_j].is_sorted())
98  {
99  SG_PRINT("m_labels[label_j=%d] not sorted: ", label_j);
100  m_labels[label_j].display_vector("");
101  }
102 
103  REQUIRE(m_labels[label_j].is_sorted(),
104  "labels[%d] are not sorted!", label_j);
105 
106  int32_t c_len = m_labels[label_j].vlen;
107  if (c_len <= 0)
108  {
109  continue;
110  }
111 
112  REQUIRE(m_labels[label_j].vector[0] >= 0,
113  "first label labels[%d]=%d should be >= 0!",
114  label_j, m_labels[label_j].vector[0]);
115  REQUIRE(m_labels[label_j].vector[c_len - 1] < get_num_classes(),
116  "last label labels[%d]=%d should be < num_classes == %d!",
117  label_j, m_labels[label_j].vector[0], get_num_classes());
118  }
119 }
120 
121 
122 int32_t
124 {
125  return m_num_labels;
126 }
127 
128 
129 int16_t
131 {
132  return m_num_classes;
133 }
134 
135 
136 void
138 {
139  for (int32_t label_j = 0; label_j < m_num_labels; label_j++)
140  {
141  m_labels[label_j] = labels[label_j];
142  }
143  ensure_valid("set_labels()");
144 }
145 
146 
148 {
149  SGVector <int32_t> ** labels_list =
150  SG_MALLOC(SGVector <int32_t> *, get_num_classes());
151  int32_t * num_label_idx =
152  SG_MALLOC(int32_t, get_num_classes());
153 
154  for (int16_t class_i = 0; class_i < get_num_classes(); class_i++)
155  {
156  num_label_idx[class_i] = 0;
157  }
158 
159  for (int32_t label_j = 0; label_j < get_num_labels(); label_j++)
160  {
161  for (int32_t c_pos = 0; c_pos < m_labels[label_j].vlen; c_pos++)
162  {
163  int16_t class_i = m_labels[label_j][c_pos];
164  REQUIRE(class_i < get_num_classes(),
165  "class_i exceeded number of classes");
166  num_label_idx[class_i]++;
167  }
168  }
169 
170  for (int16_t class_i = 0; class_i < get_num_classes(); class_i++)
171  {
172  labels_list[class_i] =
173  new SGVector <int32_t> (num_label_idx[class_i]);
174  }
175  SG_FREE(num_label_idx);
176 
177  int32_t * next_label_idx = SG_MALLOC(int32_t, get_num_classes());
178  for (int16_t class_i = 0; class_i < get_num_classes(); class_i++)
179  {
180  next_label_idx[class_i] = 0;
181  }
182 
183  for (int32_t label_j = 0; label_j < get_num_labels(); label_j++)
184  {
185  for (int32_t c_pos = 0; c_pos < m_labels[label_j].vlen; c_pos++)
186  {
187  // get class_i of current position
188  int16_t class_i = m_labels[label_j][c_pos];
189  REQUIRE(class_i < get_num_classes(),
190  "class_i exceeded number of classes");
191  // next free element in m_classes[class_i]:
192  int32_t l_pos = next_label_idx[class_i];
193  REQUIRE(l_pos < labels_list[class_i]->size(),
194  "l_pos exceeded length of label list");
195  next_label_idx[class_i]++;
196  // finally, story label_j into class-column
197  (*labels_list[class_i])[l_pos] = label_j;
198  }
199  }
200 
201  SG_FREE(next_label_idx);
202  return labels_list;
203 }
204 
205 
207 {
208  REQUIRE(j < get_num_labels(),
209  "label index j=%d should be within [%d,%d[",
210  j, 0, get_num_labels());
211  return m_labels[j];
212 }
213 
214 
215 template <class S, class D>
217 (SGVector <S> * sparse, int32_t dense_len, D d_true, D d_false)
218 {
219  SGVector <D> dense(dense_len);
220  dense.set_const(d_false);
221  for (int32_t i = 0; i < sparse->vlen; i++)
222  {
223  S index = (*sparse)[i];
224  REQUIRE(index < dense_len,
225  "class index exceeded length of dense vector");
226  dense[index] = d_true;
227  }
228  return dense;
229 }
230 
231 
232 template
233 SGVector <float64_t> CMultilabelLabels::to_dense <int16_t, float64_t>
234 (SGVector <int16_t> *, int32_t, float64_t, float64_t);
235 
236 template
237 SGVector <int32_t> CMultilabelLabels::to_dense <int16_t, int32_t>
238 (SGVector <int16_t> *, int32_t, int32_t, int32_t);
239 
240 template
241 SGVector <float64_t> CMultilabelLabels::to_dense <int32_t, float64_t>
242 (SGVector <int32_t> *, int32_t, float64_t, float64_t);
243 
244 void
246 {
247  REQUIRE(j < get_num_labels(),
248  "label index j=%d should be within [%d,%d[",
249  j, 0, get_num_labels());
250  m_labels[j] = label;
251 }
252 
253 
254 void
256 {
257  int16_t * num_class_idx = SG_MALLOC(int16_t , get_num_labels());
258  for (int32_t label_j = 0; label_j < get_num_labels(); label_j++)
259  {
260  num_class_idx[label_j] = 0;
261  }
262 
263  for (int16_t class_i = 0; class_i < get_num_classes(); class_i++)
264  {
265  for (int32_t l_pos = 0; l_pos < labels_list[class_i]->vlen; l_pos++)
266  {
267  int32_t label_j = (*labels_list[class_i])[l_pos];
268  REQUIRE(label_j < get_num_labels(),
269  "class_i=%d/%d :: label_j=%d/%d (l_pos=%d)\n",
270  class_i, get_num_classes(), label_j, get_num_labels(),
271  l_pos);
272  num_class_idx[label_j]++;
273  }
274  }
275 
276  for (int32_t label_j = 0; label_j < get_num_labels(); label_j++)
277  {
278  m_labels[label_j].resize_vector(num_class_idx[label_j]);
279  }
280  SG_FREE(num_class_idx);
281 
282  int16_t * next_class_idx = SG_MALLOC(int16_t , get_num_labels());
283  for (int32_t label_j = 0; label_j < get_num_labels(); label_j++)
284  {
285  next_class_idx[label_j] = 0;
286  }
287 
288  for (int16_t class_i = 0; class_i < get_num_classes(); class_i++)
289  {
290  for (int32_t l_pos = 0; l_pos < labels_list[class_i]->vlen; l_pos++)
291  {
292  // get class_i of current position
293  int32_t label_j = (*labels_list[class_i])[l_pos];
294  REQUIRE(label_j < get_num_labels(),
295  "class_i=%d/%d :: label_j=%d/%d (l_pos=%d)\n",
296  class_i, get_num_classes(), label_j, get_num_labels(),
297  l_pos);
298 
299  // next free element in m_labels[label_j]:
300  int32_t c_pos = next_class_idx[label_j];
301  REQUIRE(c_pos < m_labels[label_j].size(),
302  "c_pos exceeded length of labels vector");
303  next_class_idx[label_j]++;
304 
305  // finally, story label_j into class-column
306  m_labels[label_j][c_pos] = class_i;
307  }
308  }
309  SG_FREE(next_class_idx);
310 
311  return;
312 }
313 
314 
315 void
317 {
318  SGVector <int32_t> ** labels_list = get_class_labels();
319  SG_PRINT("printing %d binary label vectors for %d multilabels:\n",
321 
322  for (int32_t class_i = 0; class_i < get_num_classes(); class_i++)
323  {
324  SG_PRINT(" yC_{class_i=%d}", class_i);
325  SGVector <float64_t> dense =
326  to_dense <int32_t, float64_t> (labels_list[class_i],
327  get_num_labels(), +1, -1);
328  dense.display_vector("");
329  delete labels_list[class_i];
330  }
331  SG_FREE(labels_list);
332 
333  SG_PRINT("printing %d binary class vectors for %d labels:\n",
335 
336  for (int32_t j = 0; j < get_num_labels(); j++)
337  {
338  SG_PRINT(" y_{j=%d}", j);
339  SGVector <float64_t> dense =
340  to_dense <int16_t , float64_t> (&m_labels[j], get_num_classes(),
341  +1, -1);
342  dense.display_vector("");
343  }
344  return;
345 }
static SGVector< D > to_dense(SGVector< S > *sparse, int32_t dense_len, D d_true, D d_false)
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:35
SGVector< int16_t > * m_labels
virtual int32_t get_num_labels() const
void set_class_labels(SGVector< int32_t > **labels_list)
#define REQUIRE(x,...)
Definition: SGIO.h:208
void set_labels(SGVector< int16_t > *labels)
SGVector< int32_t > ** get_class_labels() const
#define SG_PRINT(...)
Definition: SGIO.h:139
SGIO * sg_io
Definition: init.cpp:28
SGVector< int16_t > get_label(int32_t j)
double float64_t
Definition: common.h:48
EMessageType get_loglevel() const
Definition: SGIO.cpp:285
void set_const(T const_elem)
Definition: SGVector.cpp:124
virtual int16_t get_num_classes() const
bool is_sorted() const
Definition: SGVector.cpp:215
void set_label(int32_t j, SGVector< int16_t > label)
void ensure_valid(const char *context=NULL)
void resize_vector(int32_t n)
Definition: SGVector.cpp:307
#define SG_ADD(...)
Definition: SGObject.h:71
void display_vector(const char *name="vector", const char *prefix="") const
Definition: SGVector.cpp:405
index_t vlen
Definition: SGVector.h:706

SHOGUN 机器学习工具包 - 项目文档