12
12
from ovos_bus_client .session import SessionManager
13
13
from ovos_plugin_manager .ocp import available_extractors
14
14
from ovos_plugin_manager .templates .pipeline import IntentMatch , PipelinePlugin
15
+ from ovos_utils .lang import standardize_lang_tag , get_language_dir
15
16
from ovos_utils .log import LOG
16
17
from ovos_utils .messagebus import FakeBus
17
18
from ovos_utils .ocp import MediaType , PlaybackType , PlaybackMode , PlayerState , OCP_ID , \
18
19
MediaEntry , Playlist , MediaState , TrackState , dict2entry , PluginStream
19
20
from ovos_workshop .app import OVOSAbstractApplication
20
21
from padacioso import IntentContainer
21
-
22
+ from langcodes import closest_match
22
23
from ocp_pipeline .feats import OCPFeaturizer
23
24
from ocp_pipeline .legacy import LegacyCommonPlay
24
25
@@ -102,16 +103,18 @@ def load_classifiers(self):
102
103
def load_resource_files (self ):
103
104
intents = {}
104
105
for lang in self .native_langs :
106
+ lang = standardize_lang_tag (lang )
105
107
intents [lang ] = {}
106
- locale_folder = join (dirname (__file__ ), "locale" , lang )
107
- for f in os .listdir (locale_folder ):
108
- path = join (locale_folder , f )
109
- if f in self .intents :
110
- with open (path ) as intent :
111
- samples = intent .read ().split ("\n " )
112
- for idx , s in enumerate (samples ):
113
- samples [idx ] = s .replace ("{{" , "{" ).replace ("}}" , "}" )
114
- intents [lang ][f ] = samples
108
+ locale_folder = get_language_dir (join (dirname (__file__ ), "locale" ), lang )
109
+ if locale_folder is not None :
110
+ for f in os .listdir (locale_folder ):
111
+ path = join (locale_folder , f )
112
+ if f in self .intents :
113
+ with open (path ) as intent :
114
+ samples = intent .read ().split ("\n " )
115
+ for idx , s in enumerate (samples ):
116
+ samples [idx ] = s .replace ("{{" , "{" ).replace ("}}" , "}" )
117
+ intents [lang ][f ] = samples
115
118
return intents
116
119
117
120
def register_ocp_api_events (self ):
@@ -138,6 +141,7 @@ def register_ocp_intents(self):
138
141
intent_files = self .load_resource_files ()
139
142
140
143
for lang , intent_data in intent_files .items ():
144
+ lang = standardize_lang_tag (lang )
141
145
self .intent_matchers [lang ] = IntentContainer ()
142
146
for intent_name in self .intents :
143
147
samples = intent_data .get (intent_name )
@@ -286,7 +290,8 @@ def handle_player_state_update(self, message: Message):
286
290
def match_high (self , utterances : List [str ], lang : str , message : Message = None ) -> Optional [IntentMatch ]:
287
291
""" exact matches only, handles playback control
288
292
recommended after high confidence intents pipeline stage """
289
- if lang not in self .intent_matchers :
293
+ lang = self ._get_closest_lang (lang )
294
+ if lang is None : # no intents registered for this lang
290
295
return None
291
296
292
297
self .bus .emit (Message ("ovos.common_play.status" )) # sync
@@ -327,6 +332,8 @@ def match_high(self, utterances: List[str], lang: str, message: Message = None)
327
332
def match_medium (self , utterances : List [str ], lang : str , message : Message = None ) -> Optional [IntentMatch ]:
328
333
""" match a utterance via classifiers,
329
334
recommended before common_qa pipeline stage"""
335
+ lang = standardize_lang_tag (lang )
336
+
330
337
utterance = utterances [0 ].lower ()
331
338
# is this a OCP query ?
332
339
is_ocp , bconf = self .is_ocp_query (utterance , lang )
@@ -368,6 +375,8 @@ def match_fallback(self, utterances: List[str], lang: str, message: Message = No
368
375
if not ents :
369
376
return None
370
377
378
+ lang = standardize_lang_tag (lang )
379
+
371
380
# classify the query media type
372
381
media_type , confidence = self .classify_media (utterance , lang )
373
382
@@ -388,7 +397,7 @@ def match_fallback(self, utterances: List[str], lang: str, message: Message = No
388
397
389
398
def _process_play_query (self , utterance : str , lang : str , match : dict = None ,
390
399
message : Optional [Message ] = None ) -> Optional [IntentMatch ]:
391
-
400
+ lang = standardize_lang_tag ( lang )
392
401
match = match or {}
393
402
player = self .get_player (message )
394
403
# if media is currently paused, empty string means "resume playback"
@@ -455,6 +464,7 @@ def handle_search_query(self, message: Message):
455
464
if num :
456
465
phrase += " " + num
457
466
467
+ lang = standardize_lang_tag (lang )
458
468
# classify the query media type
459
469
media_type , prob = self .classify_media (utterance , lang )
460
470
# search common play skills
@@ -503,6 +513,7 @@ def handle_play_intent(self, message: Message):
503
513
skills = message .data .get ("skills" , [])
504
514
505
515
# search common play skills
516
+ lang = standardize_lang_tag (lang )
506
517
results = self ._search (query , media_type , lang ,
507
518
skills = skills , message = message )
508
519
@@ -613,6 +624,7 @@ def handle_search_error_intent(self, message: Message):
613
624
614
625
# NLP
615
626
def voc_match_media (self , query : str , lang : str ) -> Tuple [MediaType , float ]:
627
+ lang = standardize_lang_tag (lang )
616
628
# simplistic approach via voc_match, works anywhere
617
629
# and it's easy to localize, but isn't very accurate
618
630
if self .voc_match (query , "MusicKeyword" , lang = lang ):
@@ -674,6 +686,7 @@ def voc_match_media(self, query: str, lang: str) -> Tuple[MediaType, float]:
674
686
675
687
def classify_media (self , query : str , lang : str ) -> Tuple [MediaType , float ]:
676
688
""" determine what media type is being requested """
689
+ lang = standardize_lang_tag (lang )
677
690
# using a trained classifier (Experimental)
678
691
if self .config .get ("experimental_media_classifier" , False ):
679
692
from ovos_classifiers .skovos .classifier import SklearnOVOSClassifier
@@ -701,6 +714,7 @@ def classify_media(self, query: str, lang: str) -> Tuple[MediaType, float]:
701
714
702
715
def is_ocp_query (self , query : str , lang : str ) -> Tuple [bool , float ]:
703
716
""" determine if a playback question is being asked"""
717
+ lang = standardize_lang_tag (lang )
704
718
if self .config .get ("experimental_binary_classifier" , False ):
705
719
from ovos_classifiers .skovos .classifier import SklearnOVOSClassifier
706
720
try :
@@ -731,6 +745,7 @@ def _should_resume(self, phrase: str, lang: str, message: Optional[Message] = No
731
745
@param phrase: Extracted playback phrase
732
746
@return: True if player should resume, False if this is a new request
733
747
"""
748
+ lang = standardize_lang_tag (lang )
734
749
player = self .get_player (message )
735
750
if player .player_state == PlayerState .PAUSED :
736
751
if not phrase .strip () or \
@@ -782,6 +797,7 @@ def normalize_results(self, results: list) -> List[Union[MediaEntry, Playlist, P
782
797
def filter_results (self , results : list , phrase : str , lang : str ,
783
798
media_type : MediaType = MediaType .GENERIC ,
784
799
message : Optional [Message ] = None ) -> list :
800
+ lang = standardize_lang_tag (lang )
785
801
# ignore very low score matches
786
802
l1 = len (results )
787
803
results = [r for r in results
@@ -1031,6 +1047,10 @@ def match_legacy(self, utterances: List[str], lang: str, message: Message = None
1031
1047
1032
1048
utterance = utterances [0 ].lower ()
1033
1049
1050
+ lang = self ._get_closest_lang (lang )
1051
+ if lang is None : # no intents registered for this lang
1052
+ return None
1053
+
1034
1054
match = self .intent_matchers [lang ].calc_intent (utterance )
1035
1055
1036
1056
if match ["name" ] is None :
@@ -1045,6 +1065,18 @@ def match_legacy(self, utterances: List[str], lang: str, message: Message = None
1045
1065
skill_id = OCP_ID ,
1046
1066
utterance = utterance )
1047
1067
1068
+ def _get_closest_lang (self , lang : str ) -> Optional [str ]:
1069
+ if self .intent_matchers :
1070
+ lang = standardize_lang_tag (lang )
1071
+ closest , score = closest_match (lang , list (self .intent_matchers .keys ()))
1072
+ # https://langcodes-hickford.readthedocs.io/en/sphinx/index.html#distance-values
1073
+ # 0 -> These codes represent the same language, possibly after filling in values and normalizing.
1074
+ # 1- 3 -> These codes indicate a minor regional difference.
1075
+ # 4 - 10 -> These codes indicate a significant but unproblematic regional difference.
1076
+ if score < 10 :
1077
+ return closest
1078
+ return None
1079
+
1048
1080
def handle_legacy_cps (self , message : Message ):
1049
1081
"""intent handler for legacy CPS matches"""
1050
1082
utt = message .data ["query" ]
0 commit comments