Skip to content

Commit 2385656

Browse files
committed
Make charset support working with C++ scanners too
1 parent 0fe5125 commit 2385656

File tree

3 files changed

+118
-27
lines changed

3 files changed

+118
-27
lines changed

src/FlexLexer.h

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -96,8 +96,16 @@ class FlexLexer {
9696
int debug() const { return yy_flex_debug; }
9797
void set_debug( int flag ) { yy_flex_debug = flag; }
9898

99+
#ifdef YY_CHARSET
100+
void set_charset(char *charset);
101+
char* get_charset();
102+
#endif
103+
99104
protected:
100105
YY_CHAR* yytext;
106+
#ifdef YY_CHARSET
107+
char *yycharset;
108+
#endif
101109
int yyleng;
102110
int yylineno; // only maintained if you use %option yylineno
103111
int yy_flex_debug; // only has effect with -d or "%option debug"
@@ -158,6 +166,15 @@ class yyFlexLexer : public FlexLexer {
158166
yy_state_type yy_try_NUL_trans( yy_state_type current_state );
159167
int yy_get_next_buffer();
160168

169+
#ifdef YY_CHARSET
170+
size_t yycharset_convert(char* source, size_t source_bytes, YY_CHAR* target,
171+
size_t target_length, size_t* converted_bytes);
172+
virtual size_t yycharset_handler(char *charset,
173+
char *source, size_t source_bytes,
174+
YY_CHAR *target, size_t target_length,
175+
size_t *converted_bytes);
176+
#endif
177+
161178
FLEX_STD istream* yyin; // input source for default LexerInput
162179
FLEX_STD ostream* yyout; // output sink for default LexerOutput
163180

src/flex.skl

Lines changed: 96 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -922,7 +922,12 @@ m4_ifdef( [[<M4_YY_BISON_LLOC>]],
922922

923923
m4_ifdef( [[M4_YY_CHARSET]], [[
924924
char *yycharset_r; /** current charset name */
925+
]])
926+
927+
m4_ifdef( [[M4_YY_CXX]],,[[
928+
m4_ifdef( [[M4_YY_CHARSET]],[[
925929
yycharset_handler_t yycharset_handler_r; /** charset handle function */
930+
]])
926931
]])
927932

928933
}; /* end struct yyguts_t */
@@ -1049,19 +1054,20 @@ m4_ifdef( [[M4_YY_REENTRANT]],[[
10491054
m4_ifdef( [[M4_YY_CHARSET]],[[
10501055
m4_ifdef( [[M4_YY_NO_GET_CHARSET]],,[[
10511056
char *yyget_charset M4_YY_PARAMS( M4_YY_PROTO_ONLY_ARG );
1057+
%if-c-only
10521058
yycharset_handler_t yyget_charset_handler M4_YY_PARAMS( M4_YY_PROTO_ONLY_ARG );
1059+
%endif
10531060
]])
10541061
]])
10551062
]])
10561063

10571064
m4_ifdef( [[M4_YY_REENTRANT]],[[
1058-
/* YY_REENTRANT */
10591065
m4_ifdef( [[M4_YY_CHARSET]], [[
1060-
/* YY_CHARSET */
10611066
m4_ifdef( [[M4_YY_NO_SET_CHARSET]],,[[
1062-
/* !YY_NO_SET_CHARSET */
10631067
void yyset_charset M4_YY_PARAMS( char *charset M4_YY_PROTO_LAST_ARG );
1068+
%if-c-only
10641069
void yyset_charset_handler M4_YY_PARAMS( yycharset_handler_t charset_handler M4_YY_PROTO_LAST_ARG );
1070+
%endif
10651071
]])
10661072
]])
10671073
]])
@@ -1689,26 +1695,47 @@ m4_ifdef( [[M4_YY_CHARSET]],[[
16891695
/* yycharset_convert - convert incoming data from arbitrary
16901696
* charset into internal representation
16911697
*/
1698+
%if-c-only
16921699
static size_t yycharset_convert YYFARGS5(
16931700
char*, source, size_t, source_bytes,
16941701
YY_CHAR*, target, size_t, target_length,
16951702
size_t*, converted_bytes) {
1703+
%endif
1704+
%if-c++-only
1705+
size_t yyFlexLexer::yycharset_convert(
1706+
char* source, size_t source_bytes,
1707+
YY_CHAR* target, size_t target_length,
1708+
size_t* converted_bytes) {
1709+
%endif
16961710
M4_YY_DECL_GUTS_VAR();
16971711
if(strcmp(yycharset, "M4_YY_CHARSET_SOURCE")==0) {
16981712
if(target_length < source_bytes)
16991713
YY_FATAL_ERROR("Too small buffer");
17001714
strncpy((char*)target, source, source_bytes);
17011715
*converted_bytes = source_bytes;
17021716
return source_bytes;
1703-
} else if(yycharset_handler)
1717+
}
1718+
%if-c-only
1719+
else if(yycharset_handler)
1720+
%endif
17041721
return yycharset_handler(yycharset, source, source_bytes,
17051722
target, target_length, converted_bytes M4_YY_CALL_LAST_ARG);
1706-
else {
1707-
char msg[256];
1708-
snprintf(msg, sizeof(msg),
1709-
"Unsupported character encoding: %s", yycharset);
1710-
YY_FATAL_ERROR(msg);
1711-
}
1723+
1724+
/* Code below just outputs an error message saying that selected encoding
1725+
* is not supported. In C scanner it is an end part of yycharset_convert,
1726+
* while in C++ scanner it is a default implementation of yycharset_handler */
1727+
%if-c++-only
1728+
}
1729+
1730+
size_t yyFlexLexer::yycharset_handler(char *charset,
1731+
char *source, size_t source_bytes,
1732+
YY_CHAR *target, size_t target_length,
1733+
size_t *converted_bytes) {
1734+
%endif
1735+
char msg[256];
1736+
snprintf(msg, sizeof(msg),
1737+
"Unsupported character encoding: %s", yycharset);
1738+
YY_FATAL_ERROR(msg);
17121739
return 0;
17131740
}
17141741
]])
@@ -2719,33 +2746,55 @@ int yyget_column YYFARGS0(void)
27192746
}
27202747
]])
27212748
]])
2749+
%endif
27222750

2723-
m4_ifdef( [[M4_YY_REENTRANT]],[[
2724-
m4_ifdef( [[M4_YY_CHARSET]], [[
2725-
m4_ifdef( [[M4_YY_NO_GET_CHARSET]],,[[
2726-
/** Get the currently set charset name
2751+
m4_ifdef([[M4_YY_CHARSET]],[[
2752+
m4_ifdef([[M4_YY_REENTRANT]],[[
2753+
2754+
m4_ifdef([[M4_YY_NO_GET_CHARSET]],,[[
2755+
m4_define([[M4_YY_GET_CHARSET]],[[
2756+
char* yyget_charset YYFARGS0(void)
2757+
]])
2758+
]])
2759+
2760+
m4_ifdef([[M4_YY_NO_GET_CHARSET_HANDLER]],,[[
2761+
m4_define([[M4_YY_GET_CHARSET_HANDLER]],[[
2762+
yycharset_handler_t yyget_charset_handler YYFARGS0(void)
2763+
]])
2764+
]])
2765+
]])
2766+
2767+
m4_ifdef([[M4_YY_CXX]], [[
2768+
m4_define( [[M4_YY_GET_CHARSET]], [[
2769+
char *FlexLexer::get_charset()
2770+
]])
2771+
]])
2772+
]])
2773+
2774+
m4_ifdef( [[M4_YY_GET_CHARSET]], [[
2775+
/** Get the current charset name
27272776
* M4_YY_DOC_PARAM
2777+
* @return charset name
27282778
*/
2729-
char *yyget_charset YYFARGS0(void)
2779+
M4_YY_GET_CHARSET
27302780
{
27312781
M4_YY_DECL_GUTS_VAR();
27322782
return yycharset;
27332783
}
27342784
]])
27352785

2736-
m4_ifdef( [[M4_YY_NO_GET_CHARSET_HANDLER]],,[[
2786+
m4_ifdef( [[M4_YY_GET_CHARSET_HANDLER]],[[
27372787
/** Get the currently set charset handler
27382788
* M4_YY_DOC_PARAM
27392789
*/
2740-
yycharset_handler_t yyget_charset_handler YYFARGS0(void)
2790+
M4_YY_GET_CHARSET_HANDLER
27412791
{
27422792
M4_YY_DECL_GUTS_VAR();
27432793
return yycharset_handler;
27442794
}
27452795
]])
2746-
]])
2747-
]])
27482796

2797+
%if-c-only
27492798
m4_ifdef( [[M4_YY_NO_GET_IN]],,
27502799
[[
27512800
/** Get the input stream.
@@ -2851,22 +2900,44 @@ void yyset_column YYFARGS1( int , column_no)
28512900
}
28522901
]])
28532902
]])
2903+
%endif
28542904

2855-
m4_ifdef( [[M4_YY_REENTRANT]],[[
2856-
m4_ifdef( [[M4_YY_CHARSET]], [[
2857-
m4_ifdef( [[M4_YY_NO_SET_CHARSET]],,[[
2905+
m4_ifdef([[M4_YY_CHARSET]], [[
2906+
m4_ifdef([[M4_YY_REENTRANT]],[[
2907+
2908+
m4_ifdef([[M4_YY_NO_SET_CHARSET]],,[[
2909+
m4_define([[M4_YY_SET_CHARSET]], [[
2910+
void yyset_charset YYFARGS1(char*, charset)
2911+
]])
2912+
]])
2913+
2914+
m4_ifdef([[M4_YY_NO_SET_CHARSET_HANDLER]],,[[
2915+
m4_define([[M4_YY_SET_CHARSET_HANDLER]],[[
2916+
void yyset_charset_handler YYFARGS1(yycharset_handler_t, charset_handler)
2917+
]])
2918+
]])
2919+
]])
2920+
2921+
m4_ifdef( [[M4_YY_CXX]],[[
2922+
m4_define( [[M4_YY_SET_CHARSET]], [[
2923+
void FlexLexer::set_charset(char *charset)
2924+
]])
2925+
]])
2926+
]])
2927+
2928+
m4_ifdef( [[M4_YY_SET_CHARSET]],[[
28582929
/** Set the current charset name
28592930
* @param charset charset name
28602931
* M4_YY_DOC_PARAM
28612932
*/
2862-
void yyset_charset YYFARGS1( char*, charset)
2933+
M4_YY_SET_CHARSET
28632934
{
28642935
M4_YY_DECL_GUTS_VAR();
28652936
yycharset = strdup(charset);
28662937
}
28672938
]])
28682939

2869-
m4_ifdef( [[M4_YY_NO_SET_CHARSET_HANDLER]],,[[
2940+
m4_ifdef( [[M4_YY_SET_CHARSET_HANDLER]],[[
28702941
/** Set the current charset handler
28712942
* @param charset_handler handler function
28722943
* M4_YY_DOC_PARAM
@@ -2877,10 +2948,8 @@ void yyset_charset_handler YYFARGS1( yycharset_handler_t, charset_handler)
28772948
yycharset_handler = charset_handler;
28782949
}
28792950
]])
2880-
]])
2881-
]])
2882-
28832951

2952+
%if-c-only
28842953
m4_ifdef( [[M4_YY_NO_SET_IN]],,
28852954
[[
28862955
/** Set the input stream. This does not discard the current

src/main.c

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -304,6 +304,8 @@ void check_options ()
304304
if (C_plus_plus && bison_bridge_lval)
305305
flexerror (_("bison bridge not supported for the C++ scanner."));
306306

307+
if(C_plus_plus)
308+
buf_m4_define( &m4defs_buf, "M4_YY_CXX", NULL);
307309

308310
if (useecs) { /* Set up doubly-linked equivalence classes. */
309311

@@ -1663,6 +1665,9 @@ void readin ()
16631665
}
16641666
OUT_END_CODE ();
16651667

1668+
if(charset_enabled)
1669+
outn ("#define YY_CHARSET");
1670+
16661671
if (C_plus_plus) {
16671672
outn ("#define yytext_ptr yytext");
16681673

0 commit comments

Comments
 (0)