1
1
import emel/lazy/math/statistics as stats
2
2
import emel/utils/result as ut_res
3
3
import emel/utils/zlist as ut_zlist
4
+ import gleam/dict . { type Dict }
4
5
import gleam/int
5
- import gleam/map . { Map }
6
6
import gleam/pair
7
- import gleam_zlists . { ZList } as zlist
7
+ import gleam_zlists . { type ZList } as zlist
8
8
9
9
pub fn entropy_with_size (
10
- dataset : ZList ( Map ( String , String ) ) ,
10
+ dataset : ZList ( Dict ( String , String ) ) ,
11
11
class_attr : String ,
12
12
) -> # ( Float , Int ) {
13
13
let # ( freqs , size ) =
@@ -23,7 +23,7 @@ pub fn entropy_with_size(
23
23
}
24
24
25
25
pub fn feature_entropy (
26
- dataset : ZList ( Map ( String , String ) ) ,
26
+ dataset : ZList ( Dict ( String , String ) ) ,
27
27
class_attr : String ,
28
28
feature : String ,
29
29
dataset_size : Int ,
@@ -40,23 +40,23 @@ pub fn feature_entropy(
40
40
}
41
41
42
42
fn same_class (
43
- rule : Map ( String , String ) ,
43
+ rule : Dict ( String , String ) ,
44
44
class_attr : String ,
45
- sub_dataset : ZList ( Map ( String , String ) ) ,
46
- ) -> ZList ( Map ( String , String ) ) {
45
+ sub_dataset : ZList ( Dict ( String , String ) ) ,
46
+ ) -> ZList ( Dict ( String , String ) ) {
47
47
sub_dataset
48
48
|> zlist . head
49
49
|> ut_res . unsafe_res
50
50
|> ut_res . unsafe_get ( class_attr )
51
- |> map . insert ( rule , class_attr , _)
51
+ |> dict . insert ( rule , class_attr , _)
52
52
|> zlist . singleton
53
53
}
54
54
55
55
fn exhausted_attributes (
56
- rule : Map ( String , String ) ,
56
+ rule : Dict ( String , String ) ,
57
57
class_attr : String ,
58
- grouped_by_class : ZList ( # ( String , ZList ( Map ( String , String ) ) ) ) ,
59
- ) -> ZList ( Map ( String , String ) ) {
58
+ grouped_by_class : ZList ( # ( String , ZList ( Dict ( String , String ) ) ) ) ,
59
+ ) -> ZList ( Dict ( String , String ) ) {
60
60
grouped_by_class
61
61
|> ut_zlist . max_by ( fn ( t ) {
62
62
t
@@ -66,16 +66,16 @@ fn exhausted_attributes(
66
66
} )
67
67
|> ut_res . unsafe_res
68
68
|> pair . first
69
- |> map . insert ( rule , class_attr , _)
69
+ |> dict . insert ( rule , class_attr , _)
70
70
|> zlist . singleton
71
71
}
72
72
73
73
fn unfold_rule (
74
- rule : Map ( String , String ) ,
74
+ rule : Dict ( String , String ) ,
75
75
non_selected_attrs : ZList ( String ) ,
76
76
class_attr : String ,
77
- sub_dataset : ZList ( Map ( String , String ) ) ,
78
- ) -> ZList ( Map ( String , String ) ) {
77
+ sub_dataset : ZList ( Dict ( String , String ) ) ,
78
+ ) -> ZList ( Dict ( String , String ) ) {
79
79
let grouped_by_class =
80
80
ut_zlist . group_by ( sub_dataset , ut_res . unsafe_get ( _, class_attr ) )
81
81
case zlist . count ( grouped_by_class ) {
@@ -89,24 +89,19 @@ fn unfold_rule(
89
89
let next_selected_attr =
90
90
non_selected_attrs
91
91
|> ut_zlist . max_by ( fn ( feature ) {
92
- entropy -. feature_entropy (
93
- sub_dataset ,
94
- class_attr ,
95
- feature ,
96
- dataset_size ,
97
- )
92
+ entropy
93
+ -. feature_entropy ( sub_dataset , class_attr , feature , dataset_size )
98
94
} )
99
95
|> ut_res . unsafe_res
100
96
let next_non_selected_attrs =
101
- zlist . filter (
102
- non_selected_attrs ,
103
- fn ( attr ) { attr != next_selected_attr } ,
104
- )
97
+ zlist . filter ( non_selected_attrs , fn ( attr ) {
98
+ attr != next_selected_attr
99
+ } )
105
100
sub_dataset
106
101
|> ut_zlist . group_by ( ut_res . unsafe_get ( _, next_selected_attr ) )
107
102
|> zlist . flat_map ( fn ( t ) {
108
103
let # ( feature_val , sub_group ) = t
109
- let next_rule = map . insert ( rule , next_selected_attr , feature_val )
104
+ let next_rule = dict . insert ( rule , next_selected_attr , feature_val )
110
105
unfold_rule (
111
106
next_rule ,
112
107
next_non_selected_attrs ,
@@ -120,32 +115,29 @@ fn unfold_rule(
120
115
}
121
116
122
117
pub fn decision_tree (
123
- dataset : ZList ( Map ( String , String ) ) ,
118
+ dataset : ZList ( Dict ( String , String ) ) ,
124
119
attributes : ZList ( String ) ,
125
120
class : String ,
126
- ) -> ZList ( Map ( String , String ) ) {
127
- unfold_rule ( map . new ( ) , attributes , class , dataset )
121
+ ) -> ZList ( Dict ( String , String ) ) {
122
+ unfold_rule ( dict . new ( ) , attributes , class , dataset )
128
123
}
129
124
130
125
pub fn classifier (
131
- dataset : ZList ( Map ( String , String ) ) ,
126
+ dataset : ZList ( Dict ( String , String ) ) ,
132
127
discrete_attributes : ZList ( String ) ,
133
128
class : String ,
134
- ) -> fn ( Map ( String , String ) ) -> String {
135
- let all_rules : ZList ( Map ( String , String ) ) =
129
+ ) -> fn ( Dict ( String , String ) ) -> String {
130
+ let all_rules : ZList ( Dict ( String , String ) ) =
136
131
decision_tree ( dataset , discrete_attributes , class )
137
132
fn ( item ) {
138
133
all_rules
139
134
|> zlist . find ( fn ( rule ) {
140
- zlist . all (
141
- discrete_attributes ,
142
- fn ( feature ) {
143
- case map . get ( rule , feature ) {
144
- Error ( Nil ) -> True
145
- Ok ( v ) -> v == ut_res . unsafe_get ( item , feature )
146
- }
147
- } ,
148
- )
135
+ zlist . all ( discrete_attributes , fn ( feature ) {
136
+ case dict . get ( rule , feature ) {
137
+ Error ( Nil ) -> True
138
+ Ok ( v ) -> v == ut_res . unsafe_get ( item , feature )
139
+ }
140
+ } )
149
141
} )
150
142
|> ut_res . unsafe_res
151
143
|> ut_res . unsafe_get ( class )
0 commit comments