22 #ifndef DOXYGEN_SHOULD_SKIP_THIS 23 struct HASHEDWD_THREAD_PARAM
36 #endif // DOXYGEN_SHOULD_SKIP_THIS 42 "CHashedWDFeaturesTransposed::CHashedWDFeaturesTransposed()",
64 int32_t start_order, int32_t order, int32_t from_order,
76 int32_t transposed_num_feat=0;
77 int32_t transposed_num_vec=0;
133 bool free_vec1, free_vec2;
136 uint8_t* vec2=wdf->strings->get_feature_vector(vec_idx2, len2, free_vec2);
142 for (int32_t i=0; i<len1; i++)
144 for (int32_t j=0; (i+j<len1) && (j<
degree); j++)
146 if (vec1[i+j]!=vec2[i+j])
153 wdf->strings->free_feature_vector(vec2, vec_idx2, free_vec2);
159 if (vec2_len !=
w_dim)
160 SG_ERROR(
"Dimensions don't match, vec2_dim=%d, w_dim=%d\n", vec2_len,
w_dim)
166 uint32_t* val=SG_MALLOC(uint32_t, len);
172 for (int32_t i=0; i < len; i++)
178 for (int32_t k=0; k<
degree && i+k<len; k++)
185 #ifdef DEBUG_HASHEDWD 186 SG_PRINT(
"vec[i]=%d, k=%d, offs=%d o=%d h=%d \n", vec[i], k,offs, o, h)
188 sum+=vec2[o+(h &
mask)]*wd;
209 uint32_t* index=SG_MALLOC(uint32_t, stop);
211 int32_t num_vectors=stop-start;
218 int32_t num_threads=1;
225 SG_ERROR(
"Dimensions don't match, vec_len=%d, w_dim=%d\n", dim,
w_dim)
229 HASHEDWD_THREAD_PARAM params;
231 params.sub_index=NULL;
232 params.output=output;
235 params.alphas=alphas;
238 params.progress=
false;
245 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
246 HASHEDWD_THREAD_PARAM* params = SG_MALLOC(HASHEDWD_THREAD_PARAM, num_threads);
247 int32_t step= num_vectors/num_threads;
251 for (t=0; t<num_threads-1; t++)
254 params[t].sub_index=NULL;
255 params[t].output = output;
256 params[t].start = start+t*step;
257 params[t].stop = start+(t+1)*step;
258 params[t].alphas=alphas;
261 params[t].progress =
false;
262 params[t].index=index;
263 pthread_create(&threads[t], NULL,
268 params[t].sub_index=NULL;
269 params[t].output = output;
270 params[t].start = start+t*step;
271 params[t].stop = stop;
272 params[t].alphas=alphas;
275 params[t].progress =
false;
276 params[t].index=index;
279 for (t=0; t<num_threads-1; t++)
280 pthread_join(threads[t], NULL);
290 SG_INFO(
"prematurely stopped. \n")
299 uint32_t* index=SG_MALLOC(uint32_t, num);
305 int32_t num_threads=1;
312 SG_ERROR(
"Dimensions don't match, vec_len=%d, w_dim=%d\n", dim,
w_dim)
316 HASHEDWD_THREAD_PARAM params;
318 params.sub_index=sub_index;
319 params.output=output;
322 params.alphas=alphas;
325 params.progress=
false;
332 pthread_t* threads = SG_MALLOC(pthread_t, num_threads-1);
333 HASHEDWD_THREAD_PARAM* params = SG_MALLOC(HASHEDWD_THREAD_PARAM, num_threads);
334 int32_t step= num/num_threads;
338 for (t=0; t<num_threads-1; t++)
341 params[t].sub_index=sub_index;
342 params[t].output = output;
343 params[t].start = t*step;
344 params[t].stop = (t+1)*step;
345 params[t].alphas=alphas;
348 params[t].progress =
false;
349 params[t].index=index;
350 pthread_create(&threads[t], NULL,
355 params[t].sub_index=sub_index;
356 params[t].output = output;
357 params[t].start = t*step;
358 params[t].stop = num;
359 params[t].alphas=alphas;
362 params[t].progress =
false;
363 params[t].index=index;
366 for (t=0; t<num_threads-1; t++)
367 pthread_join(threads[t], NULL);
377 SG_INFO(
"prematurely stopped. \n")
383 HASHEDWD_THREAD_PARAM* par=(HASHEDWD_THREAD_PARAM*) p;
385 int32_t* sub_index=par->sub_index;
387 int32_t start=par->start;
388 int32_t stop=par->stop;
392 bool progress=par->progress;
393 uint32_t* index=par->index;
404 for (int32_t j=start; j<stop; j++)
414 uint8_t* dim=transposed_strings[i+k].
string;
417 for (int32_t j=start; j<stop; j++)
419 uint8_t bval=dim[sub_index[j]];
421 index[j] = 0xDEADBEAF;
428 index[j], carry, chunk);
430 output[j]+=vec[o + (h &
mask)]*wd;
436 index[stop-1], carry, chunk);
440 offs+=partial_w_dim*
degree;
446 for (int32_t j=start; j<stop; j++)
449 output[j]=output[j]*alphas[sub_index[j]]/normalization_const+bias;
451 output[j]=output[j]/normalization_const+bias;
465 uint8_t* dim=transposed_strings[i+k].
string;
469 for (int32_t j=start; j<stop; j++)
471 uint8_t bval=dim[sub_index[j]];
473 index[j] = 0xDEADBEAF;
480 index[j], carry, chunk);
483 output[j]+=vec[o + (h &
mask)]*wd;
487 index[stop-1], carry, chunk);
491 offs+=partial_w_dim*
degree;
497 for (int32_t j=start; j<stop; j++)
500 output[j]=output[j]*alphas[j]/normalization_const+bias;
502 output[j]=output[j]/normalization_const+bias;
511 if (vec2_len !=
w_dim)
512 SG_ERROR(
"Dimensions don't match, vec2_dim=%d, w_dim=%d\n", vec2_len,
w_dim)
517 uint32_t* val=SG_MALLOC(uint32_t, len);
526 for (int32_t i=0; i<len; i++)
532 for (int32_t k=0; k<
degree && i+k<len; k++)
539 #ifdef DEBUG_HASHEDWD 540 SG_PRINT(
"offs=%d o=%d h=%d \n", offs, o, h)
541 SG_PRINT(
"vec[i]=%d, k=%d, offs=%d o=%d\n", vec[i], k,offs, o)
543 vec2[o+(h &
mask)]+=wd;
566 for (int32_t i=0; i<
degree; i++)
569 SG_DEBUG(
"created HashedWDFeaturesTransposed with d=%d (%d), alphabetsize=%d, " 570 "dim=%d partial_dim=%d num=%d, len=%d\n",
581 for (int32_t i=0; i<
degree; i++)
CHashedWDFeaturesTransposed()
virtual int32_t get_num_vectors() const
virtual void free_feature_iterator(void *iterator)
int32_t get_num_symbols() const
void set_normalization_const(float64_t n=0)
float64_t normalization_const
int32_t get_num_threads() const
#define SG_NOTIMPLEMENTED
virtual int32_t get_num_vectors() const
The class Alphabet implements an alphabet and alphabet utility functions.
static uint32_t FinalizeIncrementalMurmurHash3(uint32_t h, uint32_t carry, uint32_t total_length)
virtual void add_to_dense_vec(float64_t alpha, int32_t vec_idx1, float64_t *vec2, int32_t vec2_len, bool abs_val=false)
Features that support dot products among other operations.
virtual void * get_feature_iterator(int32_t vector_index)
virtual EFeatureClass get_feature_class() const
static void clear_cancel()
void free_feature_vector(ST *feat_vec, int32_t num, bool dofree)
virtual float64_t dot(int32_t vec_idx1, CDotFeatures *df, int32_t vec_idx2)
virtual EFeatureClass get_feature_class() const =0
SGVector< ST > get_feature_vector(int32_t num)
static void fill_vector(T *vec, int32_t len, T value)
static bool cancel_computations()
static void IncrementalMurmurHash3(uint32_t *hash, uint32_t *carry, uint8_t *data, int32_t len)
all of classes and functions are contained in the shogun namespace
T sum(const Container< T > &a, bool no_diag=false)
CStringFeatures< uint8_t > * strings
virtual bool get_next_feature(int32_t &index, float64_t &value, void *iterator)
SGString< uint8_t > * transposed_strings
The class Features is the base class of all feature objects.
virtual float64_t dense_dot(int32_t vec_idx1, const float64_t *vec2, int32_t vec2_len)
virtual int32_t get_max_vector_length()
virtual EFeatureType get_feature_type() const
static void * dense_dot_range_helper(void *p)
Features that compute the Weighted Degreee Kernel feature space explicitly.
CAlphabet * get_alphabet()
virtual void dense_dot_range_subset(int32_t *sub_index, int32_t num, float64_t *output, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b)
virtual ~CHashedWDFeaturesTransposed()
static float32_t sqrt(float32_t x)
#define SG_UNSTABLE(func,...)
virtual CFeatures * duplicate() const
bool have_same_length(int32_t len=-1)
void progress(float64_t current_val, float64_t min_val=0.0, float64_t max_val=1.0, int32_t decimals=1, const char *prefix="PROGRESS:\)
virtual EFeatureType get_feature_type() const =0
virtual void dense_dot_range(float64_t *output, int32_t start, int32_t stop, float64_t *alphas, float64_t *vec, int32_t dim, float64_t b)
CStringFeatures< ST > * get_transposed()