SHOGUN  v1.1.0
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
StratifiedCrossValidationSplitting.cpp
Go to the documentation of this file.
1 /*
2  * This program is free software; you can redistribute it and/or modify
3  * it under the terms of the GNU General Public License as published by
4  * the Free Software Foundation; either version 3 of the License, or
5  * (at your option) any later version.
6  *
7  * Written (W) 2011 Heiko Strathmann
8  * Copyright (C) 2011 Berlin Institute of Technology and Max-Planck-Society
9  */
10 
12 #include <shogun/features/Labels.h>
13 #include <shogun/lib/Set.h>
14 
15 using namespace shogun;
16 
18  CSplittingStrategy(0, 0)
19 {
20 }
21 
23  CLabels* labels, index_t num_subsets) :
24  CSplittingStrategy(labels, num_subsets)
25 {
26  build_subsets();
27 }
28 
30 {
31  /* extract all labels */
32  CSet<float64_t> unique_labels;
33  for (index_t i=0; i<m_labels->get_num_labels(); ++i)
34  unique_labels.add(m_labels->get_label(i));
35 
36  /* for every label, build set for indices */
38  for (index_t i=0; i<unique_labels.get_num_elements(); ++i)
39  label_indices.append_element(new CDynamicArray<index_t> ());
40 
41  /* fill set with indices, for each label type ... */
42  for (index_t i=0; i<unique_labels.get_num_elements(); ++i)
43  {
44  /* ... iterate over all labels and add indices with same label to set */
45  for (index_t j=0; j<m_labels->get_num_labels(); ++j)
46  {
47  if (m_labels->get_label(j)==unique_labels[i])
48  {
49  CDynamicArray<index_t>* current=label_indices.get_element(i);
50  current->append_element(j);
51  SG_UNREF(current);
52  }
53  }
54  }
55 
56  /* shuffle created label sets */
57  for (index_t i=0; i<label_indices.get_num_elements(); ++i)
58  {
59  CDynamicArray<index_t>* current=label_indices.get_element(i);
60  current->shuffle();
61  SG_UNREF(current);
62  }
63 
64  /* distribute labels to subsets for all label types */
65  index_t target_set=0;
66  for (index_t i=0; i<unique_labels.get_num_elements(); ++i)
67  {
68  /* current index set for current label */
69  CDynamicArray<index_t>* current=label_indices.get_element(i);
70 
71  for (index_t j=0; j<current->get_num_elements(); ++j)
72  {
73  CDynamicArray<index_t>* next=m_subset_indices->get_element(
74  target_set++);
75  next->append_element(current->get_element(j));
76  target_set%=m_subset_indices->get_num_elements();
77  SG_UNREF(next);
78  }
79 
80  SG_UNREF(current);
81  }
82 
83  /* finally shuffle to avoid that subsets with low indices have more
84  * elements, which happens if the number of class labels is not equal to
85  * the number of subsets */
86  m_subset_indices->shuffle();
87 }
T get_element(int32_t index) const
Definition: DynamicArray.h:88
T * get_element(int32_t index) const
The class Labels models labels, i.e. class assignments of objects.
Definition: Labels.h:35
Abstract base class for all splitting types. Takes a CLabels instance and generates a desired number ...
int32_t get_num_elements() const
Definition: DynamicArray.h:76
int32_t get_num_labels()
Definition: Labels.cpp:240
CDynamicObjectArray< CDynamicArray< index_t > > * m_subset_indices
bool append_element(T element)
Definition: DynamicArray.h:132
Template Set class.
Definition: Set.h:26
float64_t get_label(int32_t idx)
Definition: Labels.cpp:223
Template Dynamic array class that creates an array that can be used like a list or an array...
Definition: DynArray.h:21
#define SG_UNREF(x)
Definition: SGObject.h:45
void add(T e)
Definition: Set.h:45
int32_t get_num_elements() const
Definition: Set.h:86
int32_t index_t
Definition: DataType.h:25

SHOGUN Machine Learning Toolbox - Documentation