Skip to content

Commit 24d3f19

Browse files
dubovinszkyfacebook-github-bot
authored andcommitted
HU Setup + Numeral
Summary: - Setup Hungarian (HU) language - Added Numeral Dimension Closes #79 Reviewed By: blandinw Differential Revision: D5595812 Pulled By: patapizza fbshipit-source-id: 5959938
1 parent 5d03b45 commit 24d3f19

File tree

13 files changed

+379
-0
lines changed

13 files changed

+379
-0
lines changed

Duckling/Dimensions.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import qualified Duckling.Dimensions.FR as FRDimensions
3232
import qualified Duckling.Dimensions.GA as GADimensions
3333
import qualified Duckling.Dimensions.HE as HEDimensions
3434
import qualified Duckling.Dimensions.HR as HRDimensions
35+
import qualified Duckling.Dimensions.HU as HUDimensions
3536
import qualified Duckling.Dimensions.ID as IDDimensions
3637
import qualified Duckling.Dimensions.IT as ITDimensions
3738
import qualified Duckling.Dimensions.JA as JADimensions
@@ -90,6 +91,7 @@ langDimensions FR = FRDimensions.allDimensions
9091
langDimensions GA = GADimensions.allDimensions
9192
langDimensions HE = HEDimensions.allDimensions
9293
langDimensions HR = HRDimensions.allDimensions
94+
langDimensions HU = HUDimensions.allDimensions
9395
langDimensions ID = IDDimensions.allDimensions
9496
langDimensions IT = ITDimensions.allDimensions
9597
langDimensions JA = JADimensions.allDimensions

Duckling/Dimensions/HU.hs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
-- Copyright (c) 2016-present, Facebook, Inc.
2+
-- All rights reserved.
3+
--
4+
-- This source code is licensed under the BSD-style license found in the
5+
-- LICENSE file in the root directory of this source tree. An additional grant
6+
-- of patent rights can be found in the PATENTS file in the same directory.
7+
8+
9+
module Duckling.Dimensions.HU
10+
( allDimensions
11+
) where
12+
13+
import Duckling.Dimensions.Types
14+
15+
allDimensions :: [Some Dimension]
16+
allDimensions =
17+
[ This Numeral
18+
]

Duckling/Lang.hs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ data Lang
3434
| GA
3535
| HE
3636
| HR
37+
| HU
3738
| ID
3839
| IT
3940
| JA

Duckling/Numeral/HU/Corpus.hs

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,98 @@
1+
-- Copyright (c) 2016-present, Facebook, Inc.
2+
-- All rights reserved.
3+
--
4+
-- This source code is licensed under the BSD-style license found in the
5+
-- LICENSE file in the root directory of this source tree. An additional grant
6+
-- of patent rights can be found in the PATENTS file in the same directory.
7+
8+
9+
{-# LANGUAGE OverloadedStrings #-}
10+
11+
module Duckling.Numeral.HU.Corpus
12+
( corpus ) where
13+
14+
import Data.String
15+
import Prelude
16+
17+
import Duckling.Lang
18+
import Duckling.Numeral.Types
19+
import Duckling.Resolve
20+
import Duckling.Testing.Types
21+
22+
corpus :: Corpus
23+
corpus = (testContext {lang = HU}, allExamples)
24+
25+
allExamples :: [Example]
26+
allExamples = concat
27+
[ examples (NumeralValue 0)
28+
[ "0"
29+
, "nulla"
30+
, "zéró"
31+
]
32+
, examples (NumeralValue 1)
33+
[ "1"
34+
, "egy"
35+
]
36+
, examples (NumeralValue 2)
37+
[ "kettő"
38+
]
39+
, examples (NumeralValue 3)
40+
[ "három"
41+
]
42+
, examples (NumeralValue 4)
43+
[ "négy"
44+
]
45+
, examples (NumeralValue 5)
46+
[ "öt"
47+
]
48+
, examples (NumeralValue 6)
49+
[ "hat"
50+
]
51+
, examples (NumeralValue 7)
52+
[ "hét"
53+
]
54+
, examples (NumeralValue 8)
55+
[ "nyolc"
56+
]
57+
, examples (NumeralValue 9)
58+
[ "kilenc"
59+
]
60+
, examples (NumeralValue 11)
61+
[ "tizenegy"
62+
]
63+
, examples (NumeralValue 15)
64+
[ "tizenöt"
65+
]
66+
, examples (NumeralValue 17)
67+
[ "tizenhét"
68+
]
69+
, examples (NumeralValue 20)
70+
[ "20"
71+
, "húsz"
72+
]
73+
, examples (NumeralValue 22)
74+
[ "huszonkettő"
75+
]
76+
, examples (NumeralValue 24)
77+
[ "24"
78+
, "huszonnégy"
79+
]
80+
, examples (NumeralValue 26)
81+
[ "huszonhat"
82+
]
83+
, examples (NumeralValue 28)
84+
[ "huszonnyolc"
85+
]
86+
, examples (NumeralValue 10)
87+
[ "tíz"
88+
]
89+
, examples (NumeralValue 20)
90+
[ "húsz"
91+
]
92+
, examples (NumeralValue 50)
93+
[ "ötven"
94+
]
95+
, examples (NumeralValue 34)
96+
[ "harmincnégy"
97+
]
98+
]

Duckling/Numeral/HU/Rules.hs

Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
-- Copyright (c) 2016-present, Facebook, Inc.
2+
-- All rights reserved.
3+
--
4+
-- This source code is licensed under the BSD-style license found in the
5+
-- LICENSE file in the root directory of this source tree. An additional grant
6+
-- of patent rights can be found in the PATENTS file in the same directory.
7+
8+
9+
{-# LANGUAGE GADTs #-}
10+
{-# LANGUAGE OverloadedStrings #-}
11+
{-# LANGUAGE NoRebindableSyntax #-}
12+
13+
module Duckling.Numeral.HU.Rules
14+
( rules ) where
15+
16+
import Data.HashMap.Strict (HashMap)
17+
import Data.Maybe
18+
import Data.String
19+
import Data.Text (Text)
20+
import Prelude
21+
import qualified Data.HashMap.Strict as HashMap
22+
import qualified Data.Text as Text
23+
24+
import Duckling.Dimensions.Types
25+
import Duckling.Numeral.Helpers
26+
import Duckling.Numeral.Types (NumeralData (..))
27+
import Duckling.Regex.Types
28+
import Duckling.Types
29+
import qualified Duckling.Numeral.Types as TNumeral
30+
31+
ruleIntegerNumeric :: Rule
32+
ruleIntegerNumeric = Rule
33+
{ name = "integer (numeric)"
34+
, pattern =
35+
[ regex "(\\d{1,18})"
36+
]
37+
, prod = \tokens -> case tokens of
38+
(Token RegexMatch (GroupMatch (match:_)):
39+
_) -> do
40+
v <- parseInt match
41+
integer $ toInteger v
42+
_ -> Nothing
43+
}
44+
45+
ruleNumeralMap :: HashMap Text Integer
46+
ruleNumeralMap = HashMap.fromList
47+
[ ( "nulla", 0 )
48+
, ( "z\x00E9r\x00F3", 0 )
49+
, ( "egy", 1 )
50+
, ( "kett\x0151", 2 )
51+
, ( "h\x00E1rom", 3 )
52+
, ( "n\x00E9gy", 4 )
53+
, ( "\x00F6t", 5)
54+
, ( "hat", 6)
55+
, ( "h\x00E9t", 7)
56+
, ( "nyolc", 8)
57+
, ( "kilenc", 9)
58+
, ( "t\x00EDz", 10)
59+
]
60+
61+
ruleNumeral :: Rule
62+
ruleNumeral = Rule
63+
{ name = "number (0..10)"
64+
, pattern =
65+
[ regex "(nulla|z\x00E9r\x00F3|egy|kett\x0151|h\x00E1rom|n\x00E9gy|\x00F6t|hat|h\x00E9t|nyolc|kilenc|t\x00EDz)"
66+
]
67+
, prod = \tokens -> case tokens of
68+
(Token RegexMatch (GroupMatch (match:_)):_) ->
69+
HashMap.lookup (Text.toLower match) ruleNumeralMap >>= integer
70+
_ -> Nothing
71+
}
72+
73+
elevenToNineteenMap :: HashMap Text Integer
74+
elevenToNineteenMap = HashMap.fromList
75+
[ ( "tizenegy", 11 )
76+
, ( "tizenkett\x0151", 12 )
77+
, ( "tizenh\x00E1rom", 13 )
78+
, ( "tizenn\x00E9gy", 14 )
79+
, ( "tizen\x00F6t", 15 )
80+
, ( "tizenhat", 16 )
81+
, ( "tizenh\x00E9t", 17 )
82+
, ( "tizennyolc", 18 )
83+
, ( "tizenkilenc", 19 )
84+
]
85+
86+
ruleElevenToNineteen :: Rule
87+
ruleElevenToNineteen = Rule
88+
{ name = "number (11..19)"
89+
, pattern =
90+
[ regex "(tizenegy|tizenkett\x0151|tizenh\x00E1rom|tizenn\x00E9gy|tizen\x00F6t|tizenhat|tizenh\x00E9t|tizennyolc|tizenkilenc)"
91+
]
92+
, prod = \tokens -> case tokens of
93+
(Token RegexMatch (GroupMatch (match:_)):_) ->
94+
HashMap.lookup (Text.toLower match) elevenToNineteenMap >>= integer
95+
_ -> Nothing
96+
}
97+
98+
twentyoneToTwentynineMap :: HashMap Text Integer
99+
twentyoneToTwentynineMap = HashMap.fromList
100+
[ ( "huszonegy", 21 )
101+
, ( "huszonkett\x0151", 22 )
102+
, ( "huszonh\x00E1rom", 23 )
103+
, ( "huszonn\x00E9gy", 24 )
104+
, ( "huszon\x00F6t", 25 )
105+
, ( "huszonhat", 26 )
106+
, ( "huszonh\x00E9t", 27 )
107+
, ( "huszonnyolc", 28 )
108+
, ( "huszonkilenc", 29 )
109+
]
110+
111+
ruleTwentyoneToTwentynine :: Rule
112+
ruleTwentyoneToTwentynine = Rule
113+
{ name = "number (21..29)"
114+
, pattern =
115+
[ regex "(huszonegy|huszonkett\x0151|huszonh\x00E1rom|huszonn\x00E9gy|huszon\x00F6t|huszonhat|huszonh\x00E9t|huszonnyolc|huszonkilenc)"
116+
]
117+
, prod = \tokens -> case tokens of
118+
(Token RegexMatch (GroupMatch (match:_)):_) ->
119+
HashMap.lookup (Text.toLower match) twentyoneToTwentynineMap >>= integer
120+
_ -> Nothing
121+
}
122+
123+
dozensMap :: HashMap Text Integer
124+
dozensMap = HashMap.fromList
125+
[ ( "h\x00FAsz", 20 )
126+
, ( "harminc", 30 )
127+
, ( "negyven", 40 )
128+
, ( "\x00F6tven", 50 )
129+
, ( "hatvan", 60 )
130+
, ( "hetven", 70 )
131+
, ( "nyolcvan", 80 )
132+
, ( "kilencven", 90 )
133+
]
134+
135+
ruleTens :: Rule
136+
ruleTens = Rule
137+
{ name = "integer (20,30..90)"
138+
, pattern =
139+
[ regex "(h\x00FAsz|harminc|negyven|\x00f6tven|hatvan|hetven|nyolcvan|kilencven)"
140+
]
141+
, prod = \tokens -> case tokens of
142+
(Token RegexMatch (GroupMatch (match:_)):_) ->
143+
HashMap.lookup (Text.toLower match) dozensMap >>= integer
144+
_ -> Nothing
145+
}
146+
147+
ruleCompositeTens :: Rule
148+
ruleCompositeTens = Rule
149+
{ name = "integer ([3-9][1-9])"
150+
, pattern =
151+
[ regex "(harminc|negyven|\x00F6tven|hatvan|hetven|nyolcvan|kilencven)(egy|kett\x0151|h\x00E1rom|n\x00E9gy|\x00F6t|hat|h\x00E9t|nyolc|kilenc)"
152+
]
153+
, prod = \tokens -> case tokens of
154+
(Token RegexMatch (GroupMatch (m1:m2:_)):_) -> do
155+
v1 <- HashMap.lookup (Text.toLower m1) dozensMap
156+
v2 <- HashMap.lookup (Text.toLower m2) ruleNumeralMap
157+
integer $ v1 + v2
158+
_ -> Nothing
159+
}
160+
161+
rules :: [Rule]
162+
rules =
163+
[ ruleIntegerNumeric
164+
, ruleNumeral
165+
, ruleElevenToNineteen
166+
, ruleTwentyoneToTwentynine
167+
, ruleTens
168+
, ruleCompositeTens
169+
]

Duckling/Ranking/Classifiers.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import qualified Duckling.Ranking.Classifiers.FR as FRClassifiers
2323
import qualified Duckling.Ranking.Classifiers.GA as GAClassifiers
2424
import qualified Duckling.Ranking.Classifiers.HE as HEClassifiers
2525
import qualified Duckling.Ranking.Classifiers.HR as HRClassifiers
26+
import qualified Duckling.Ranking.Classifiers.HU as HUClassifiers
2627
import qualified Duckling.Ranking.Classifiers.ID as IDClassifiers
2728
import qualified Duckling.Ranking.Classifiers.IT as ITClassifiers
2829
import qualified Duckling.Ranking.Classifiers.JA as JAClassifiers
@@ -54,6 +55,7 @@ classifiers FR = FRClassifiers.classifiers
5455
classifiers GA = GAClassifiers.classifiers
5556
classifiers HE = HEClassifiers.classifiers
5657
classifiers HR = HRClassifiers.classifiers
58+
classifiers HU = HUClassifiers.classifiers
5759
classifiers ID = IDClassifiers.classifiers
5860
classifiers IT = ITClassifiers.classifiers
5961
classifiers JA = JAClassifiers.classifiers

Duckling/Ranking/Classifiers/HU.hs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
-- Copyright (c) 2016-present, Facebook, Inc.
2+
-- All rights reserved.
3+
--
4+
-- This source code is licensed under the BSD-style license found in the
5+
-- LICENSE file in the root directory of this source tree. An additional grant
6+
-- of patent rights can be found in the PATENTS file in the same directory.
7+
8+
-----------------------------------------------------------------
9+
-- Auto-generated by regenClassifiers
10+
--
11+
-- DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
12+
-- @generated
13+
-----------------------------------------------------------------
14+
{-# LANGUAGE OverloadedStrings #-}
15+
module Duckling.Ranking.Classifiers.HU (classifiers) where
16+
import Prelude
17+
import Duckling.Ranking.Types
18+
import qualified Data.HashMap.Strict as HashMap
19+
import Data.String
20+
21+
classifiers :: Classifiers
22+
classifiers = HashMap.fromList []

Duckling/Rules.hs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ import qualified Duckling.Rules.FR as FRRules
3434
import qualified Duckling.Rules.GA as GARules
3535
import qualified Duckling.Rules.HE as HERules
3636
import qualified Duckling.Rules.HR as HRRules
37+
import qualified Duckling.Rules.HU as HURules
3738
import qualified Duckling.Rules.ID as IDRules
3839
import qualified Duckling.Rules.IT as ITRules
3940
import qualified Duckling.Rules.JA as JARules
@@ -81,6 +82,7 @@ langRules FR = FRRules.rules
8182
langRules GA = GARules.rules
8283
langRules HE = HERules.rules
8384
langRules HR = HRRules.rules
85+
langRules HU = HURules.rules
8486
langRules ID = IDRules.rules
8587
langRules IT = ITRules.rules
8688
langRules JA = JARules.rules

0 commit comments

Comments
 (0)