@@ -57,8 +57,8 @@ module Classifiers
57
57
58
58
class NaiveBayes < Classifier
59
59
60
- parameters_info :m => " Default value is set to 0. It may be set to a value greater than " +
61
- " 0 when the size of the dataset is relatively small"
60
+ parameters_info :m => ' Default value is set to 0. It may be set to a value greater than ' +
61
+ ' 0 when the size of the dataset is relatively small'
62
62
63
63
def initialize
64
64
@m = 0
@@ -75,7 +75,7 @@ def initialize
75
75
# b.eval(["Red", "SUV", "Domestic"])
76
76
# => 'No'
77
77
def eval ( data )
78
- prob = @class_prob . map { | cp | cp }
78
+ prob = @class_prob . dup
79
79
prob = calculate_class_probabilities_for_entry ( data , prob )
80
80
index_to_klass ( prob . index ( prob . max ) )
81
81
end
@@ -90,27 +90,28 @@ def eval(data)
90
90
# b.get_probability_map(["Red", "SUV", "Domestic"])
91
91
# => {"Yes"=>0.4166666666666667, "No"=>0.5833333333333334}
92
92
def get_probability_map ( data )
93
- prob = @class_prob . map { | cp | cp }
93
+ prob = @class_prob . dup
94
94
prob = calculate_class_probabilities_for_entry ( data , prob )
95
95
prob = normalize_class_probability prob
96
96
probability_map = { }
97
97
prob . each_with_index { |p , i | probability_map [ index_to_klass ( i ) ] = p }
98
- return probability_map
98
+
99
+ probability_map
99
100
end
100
101
101
102
# counts values of the attribute instances and calculates the probability of the classes
102
103
# and the conditional probabilities
103
104
# Parameter data has to be an instance of CsvDataSet
104
105
def build ( data )
105
- raise " Error instance must be passed" unless data . is_a? ( Ai4r ::Data ::DataSet )
106
- raise " Data should not be empty" if data . data_items . length == 0
106
+ raise ' Error instance must be passed' unless data . is_a? ( Ai4r ::Data ::DataSet )
107
+ raise ' Data should not be empty' if data . data_items . length == 0
107
108
108
109
initialize_domain_data ( data )
109
110
initialize_klass_index
110
111
initialize_pc
111
112
calculate_probabilities
112
113
113
- return self
114
+ self
114
115
end
115
116
116
117
private
@@ -128,7 +129,7 @@ def initialize_domain_data(data)
128
129
# probability of every attribute in condition to a specific class
129
130
# this is repeated for every class
130
131
def calculate_class_probabilities_for_entry ( data , prob )
131
- prob . each_with_index do |prob_entry , prob_index |
132
+ 0 . upto ( prob . length - 1 ) do |prob_index |
132
133
data . each_with_index do |att , index |
133
134
next if value_index ( att , index ) . nil?
134
135
prob [ prob_index ] *= @pcp [ index ] [ value_index ( att , index ) ] [ prob_index ]
@@ -140,13 +141,13 @@ def calculate_class_probabilities_for_entry(data, prob)
140
141
def normalize_class_probability ( prob )
141
142
prob_sum = sum ( prob )
142
143
prob_sum > 0 ?
143
- prob . map { |prob_entry | prob_entry / prob_sum } :
144
+ prob . map { |prob_entry | prob_entry / prob_sum } :
144
145
prob
145
146
end
146
147
147
148
# sums an array up; returns a number of type Float
148
149
def sum ( array )
149
- array . inject ( 0.0 ) { |b , i | b + i }
150
+ array . inject ( 0.0 ) { |b , i | b + i }
150
151
end
151
152
152
153
# returns the name of the class when the index is found
@@ -160,7 +161,7 @@ def initialize_klass_index
160
161
@klass_index [ dl ] = index
161
162
end
162
163
163
- @data_labels . each_with_index do |dl , index |
164
+ 0 . upto ( @data_labels . length - 1 ) do |index |
164
165
@values [ index ] = { }
165
166
@domains [ index ] . each_with_index do |d , d_index |
166
167
@values [ index ] [ d ] = d_index
@@ -180,27 +181,27 @@ def value_index(value, dl_index)
180
181
181
182
# builds an array of the form:
182
183
# array[attributes][values][classes]
183
- def build_array ( dl , index )
184
+ def build_array ( index )
184
185
domains = Array . new ( @domains [ index ] . length )
185
- domains . map do | p1 |
186
- pl = Array . new @klasses . length , 0
186
+ domains . map do
187
+ Array . new @klasses . length , 0
187
188
end
188
189
end
189
190
190
191
# initializes the two array for storing the count and conditional probabilities of
191
192
# the attributes
192
193
def initialize_pc
193
- @data_labels . each_with_index do |dl , index |
194
- @pcc << build_array ( dl , index )
195
- @pcp << build_array ( dl , index )
194
+ 0 . upto ( @data_labels . length - 1 ) do |index |
195
+ @pcc << build_array ( index )
196
+ @pcp << build_array ( index )
196
197
end
197
198
end
198
199
199
200
# calculates the occurrences of a class and the instances of a certain value of a
200
201
# certain attribute and the assigned class.
201
202
# In addition to that, it also calculates the conditional probabilities and values
202
203
def calculate_probabilities
203
- @klasses . each { |dl | @class_counts [ klass_index ( dl ) ] = 0 }
204
+ @klasses . each { |dl | @class_counts [ klass_index ( dl ) ] = 0 }
204
205
205
206
calculate_class_probabilities
206
207
count_instances
@@ -220,7 +221,7 @@ def calculate_class_probabilities
220
221
# counts the instances of a certain value of a certain attribute and the assigned class
221
222
def count_instances
222
223
@data_items . each do |item |
223
- @data_labels . each_with_index do |dl , dl_index |
224
+ 0 . upto ( @data_labels . length - 1 ) do |dl_index |
224
225
@pcc [ dl_index ] [ value_index ( item [ dl_index ] , dl_index ) ] [ klass_index ( item . klass ) ] += 1
225
226
end
226
227
end
@@ -231,7 +232,7 @@ def calculate_conditional_probabilities
231
232
@pcc . each_with_index do |attributes , a_index |
232
233
attributes . each_with_index do |values , v_index |
233
234
values . each_with_index do |klass , k_index |
234
- @pcp [ a_index ] [ v_index ] [ k_index ] = ( klass . to_f + @m * @class_prob [ k_index ] ) / ( @class_counts [ k_index ] + @m ) . to_f
235
+ @pcp [ a_index ] [ v_index ] [ k_index ] = ( klass . to_f + @m * @class_prob [ k_index ] ) / ( @class_counts [ k_index ] + @m )
235
236
end
236
237
end
237
238
end
0 commit comments