Skip to content

Commit bda0314

Browse files
authored
Merge pull request #15 from suntong/himcc-master
Further on to Himcc master
2 parents 5f14a39 + 0fd2ec0 commit bda0314

File tree

5 files changed

+158
-35
lines changed

5 files changed

+158
-35
lines changed

README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,11 @@ Options:
5050
-t, --text Text output for none-block selection mode
5151
-R, --Raw Raw text output, no trimming of leading and trailing white space
5252
-p, --piece sub CSS selectors within -css to split that block up into pieces
53-
format: PieceName=[RAW:]selector_string
54-
RAW: will return the selected as-is; else the text will be returned
53+
format: PieceName=[OutputStyle:]selector_string
54+
OutputStyle:
55+
RAW : will return the selected as-is
56+
attr[xxx] : will return the value of an attribute named xxx
57+
else the text will be returned
5558
-d, --delimiter delimiter for pieces csv output [= ]
5659
-w, --wrap-html wrap up the output with html tags
5760
-y, --style style component within the wrapped html head

cascadia_cli.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ Options:
3939
Usage: Raw text output, no trimming of leading and trailing white space
4040

4141
- Name: Piece
42-
Type: MapStringString
42+
Type: OutputStyleMap
4343
Flag: 'p,piece'
44-
Usage: 'sub CSS selectors within -css to split that block up into pieces\n\t\t\tformat: PieceName=[RAW:]selector_string\n\t\t\tRAW: will return the selected as-is; else the text will be returned'
44+
Usage: 'sub CSS selectors within -css to split that block up into pieces\n\t\t\tformat: PieceName=[OutputStyle:]selector_string\n\t\t\tOutputStyle:\n\t\t\t\tRAW : will return the selected as-is\n\t\t\t\tattr[xxx] : will return the value of an attribute named xxx \n\t\t\telse the text will be returned'
4545

4646
- Name: Deli
4747
Type: string

cascadia_cliDef.go

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
////////////////////////////////////////////////////////////////////////////
22
// Program: cascadiaC
33
// Purpose: cascadia wrapper
4-
// Authors: Tong Sun (c) 2021, All rights reserved
4+
// Authors: Tong Sun (c) 2023, All rights reserved
55
////////////////////////////////////////////////////////////////////////////
66

77
package main
@@ -23,23 +23,23 @@ import (
2323

2424
type rootT struct {
2525
cli.Helper
26-
Filei *clix.Reader `cli:"*i,in" usage:"The html/xml file to read from (or stdin)"`
27-
Fileo *clix.Writer `cli:"*o,out" usage:"The output file (or stdout)"`
28-
CSS []string `cli:"*c,css" usage:"CSS selectors (can provide more if not using --piece)"`
29-
TextOut bool `cli:"t,text" usage:"Text output for none-block selection mode"`
30-
TextRaw bool `cli:"R,Raw" usage:"Raw text output, no trimming of leading and trailing white space"`
31-
Piece MapStringString `cli:"p,piece" usage:"sub CSS selectors within -css to split that block up into pieces\n\t\t\tformat: PieceName=[RAW:]selector_string\n\t\t\tRAW: will return the selected as-is; else the text will be returned"`
32-
Deli string `cli:"d,delimiter" usage:"delimiter for pieces csv output" dft:"\t"`
33-
WrapHTML bool `cli:"w,wrap-html" usage:"wrap up the output with html tags"`
34-
Style string `cli:"y,style" usage:"style component within the wrapped html head"`
35-
Base string `cli:"b,base" usage:"base href tag used in the wrapped up html"`
36-
Quiet bool `cli:"q,quiet" usage:"be quiet"`
26+
Filei *clix.Reader `cli:"*i,in" usage:"The html/xml file to read from (or stdin)"`
27+
Fileo *clix.Writer `cli:"*o,out" usage:"The output file (or stdout)"`
28+
CSS []string `cli:"*c,css" usage:"CSS selectors (can provide more if not using --piece)"`
29+
TextOut bool `cli:"t,text" usage:"Text output for none-block selection mode"`
30+
TextRaw bool `cli:"R,Raw" usage:"Raw text output, no trimming of leading and trailing white space"`
31+
Piece OutputStyleMap `cli:"p,piece" usage:"sub CSS selectors within -css to split that block up into pieces\n\t\t\tformat: PieceName=[OutputStyle:]selector_string\n\t\t\tOutputStyle:\n\t\t\t\tRAW : will return the selected as-is\n\t\t\t\tattr[xxx] : will return the value of an attribute named xxx \n\t\t\telse the text will be returned"`
32+
Deli string `cli:"d,delimiter" usage:"delimiter for pieces csv output" dft:"\t"`
33+
WrapHTML bool `cli:"w,wrap-html" usage:"wrap up the output with html tags"`
34+
Style string `cli:"y,style" usage:"style component within the wrapped html head"`
35+
Base string `cli:"b,base" usage:"base href tag used in the wrapped up html"`
36+
Quiet bool `cli:"q,quiet" usage:"be quiet"`
3737
}
3838

3939
var root = &cli.Command{
4040
Name: "cascadiaC",
4141
Desc: "cascadia wrapper\nVersion " + version + " built on " + date +
42-
"\nCopyright (C) 2021, Tong Sun",
42+
"\nCopyright (C) 2023, Tong Sun",
4343
Text: "Command line interface to go cascadia CSS selectors package" +
4444
"\n\nUsage:\n cascadia -i in -c css -o [Options...]",
4545
Argv: func() interface{} { return new(rootT) },
@@ -59,7 +59,7 @@ var root = &cli.Command{
5959
// CSS []string
6060
// TextOut bool
6161
// TextRaw bool
62-
// Piece MapStringString
62+
// Piece OutputStyleMap
6363
// Deli string
6464
// WrapHTML bool
6565
// Style string
@@ -74,7 +74,7 @@ var root = &cli.Command{
7474
// var (
7575
// progname = "cascadiaC"
7676
// version = "0.1.0"
77-
// date = "2021-11-27"
77+
// date = "2023-01-08"
7878

7979
// rootArgv *rootT
8080
// // Opts store all the configurable options

cascadia_main.go

Lines changed: 39 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -26,22 +26,31 @@ import (
2626
// Constant and data type/structure definitions
2727

2828
const (
29-
IsRaw = "RAW:"
29+
IsRaw = "RAW"
3030
WrapHTMLEnd = `</body>`
3131
)
3232

33-
type MapStringString struct {
34-
Keys []string
35-
Values map[string]string
36-
Raw map[string]bool
33+
type OutputStyle int
34+
35+
const (
36+
OutputStyleRAW OutputStyle = iota
37+
OutputStyleATTR
38+
OutputStyleTEXT
39+
)
40+
41+
type OutputStyleMap struct {
42+
Keys []string
43+
Values map[string]string
44+
OutputStyles map[string]OutputStyle
45+
AttrName map[string]string
3746
}
3847

3948
// The OptsT type defines all the configurable options from cli.
4049
type OptsT struct {
4150
CSS []string
4251
TextOut bool
4352
TextRaw bool
44-
Piece MapStringString
53+
Piece OutputStyleMap
4554
Deli string
4655
WrapHTML bool
4756
Style string
@@ -174,10 +183,14 @@ func Cascadia(bi io.Reader, bw io.Writer, Opts OptsT) error {
174183
//fmt.Printf("] #%d: %s\n", index, item.Text())
175184
for _, key := range piece.Keys {
176185
//fmt.Printf("] %s: %s\n", key, piece.Values[key])
177-
if piece.Raw[key] {
186+
switch piece.OutputStyles[key] {
187+
case OutputStyleRAW:
178188
html.Render(bw, item.Find(piece.Values[key]).Get(0))
179189
fmt.Fprintf(bw, deli)
180-
} else {
190+
case OutputStyleATTR:
191+
fmt.Fprintf(bw, "%s%s",
192+
item.Find(piece.Values[key]).AttrOr(piece.AttrName[key], ""), deli)
193+
case OutputStyleTEXT:
181194
fmt.Fprintf(bw, "%s%s",
182195
item.Find(piece.Values[key]).Contents().Text(), deli)
183196
}
@@ -196,23 +209,35 @@ func Cascadia(bi io.Reader, bw io.Writer, Opts OptsT) error {
196209

197210
// DecodeSlice implements cli.SliceDecoder
198211
// NOTE: if SliceDecoder not implemented, the Decode method would be only invoked once
199-
func (MapStringString) DecodeSlice() {}
212+
func (OutputStyleMap) DecodeSlice() {}
200213

201214
// Decode implements cli.Decoder interface
202-
func (m *MapStringString) Decode(s string) error {
215+
func (m *OutputStyleMap) Decode(s string) error {
203216
if (m.Values) == nil {
204217
m.Values = make(map[string]string)
205-
m.Raw = make(map[string]bool)
218+
m.OutputStyles = make(map[string]OutputStyle)
219+
m.AttrName = make(map[string]string)
206220
}
207221
matches := regexp.MustCompile("(.*)=(.*)").FindStringSubmatch(s)
208222
if len(matches) < 2 {
209223
return errors.New("format error. To get help, run: " + progname)
210224
}
211225
key := matches[1]
212226
val := matches[2]
213-
if len(val) >= 4 && val[:4] == IsRaw {
214-
m.Raw[key] = true
215-
val = val[4:]
227+
index := strings.Index(val, ":")
228+
if index > 0 {
229+
style := val[:index]
230+
val = val[index+1:]
231+
if style == IsRaw {
232+
m.OutputStyles[key] = OutputStyleRAW
233+
} else if strings.HasPrefix(style, "attr[") && strings.HasSuffix(style, "]") {
234+
m.OutputStyles[key] = OutputStyleATTR
235+
m.AttrName[key] = style[5 : len(style)-1]
236+
} else {
237+
m.OutputStyles[key] = OutputStyleTEXT
238+
}
239+
} else {
240+
m.OutputStyles[key] = OutputStyleTEXT
216241
}
217242
m.Keys = append(m.Keys, key)
218243
m.Values[key] = val

cascadia_test.go

Lines changed: 97 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ func TestSelectors(t *testing.T) {
1111
buf := bytes.NewBufferString("")
1212
Opts.CSS, Opts.Piece, Opts.Deli,
1313
Opts.WrapHTML, Opts.TextOut, Opts.TextRaw, Opts.Quiet =
14-
[]string{test.selector}, MapStringString{}, ",",
14+
[]string{test.selector}, OutputStyleMap{}, ",",
1515
false, false, false, false
1616
Cascadia(strings.NewReader(test.HTML), buf, Opts)
1717
got := buf.String()
@@ -26,7 +26,7 @@ func TestSelectors(t *testing.T) {
2626
}
2727
}
2828

29-
////////////////////////////////////////////////////////////////////////////
29+
// //////////////////////////////////////////////////////////////////////////
3030
// The following is taken from
3131
// https://github.com/andybalholm/cascadia/blob/c56252c33997d9b9991f4c1e3b5fbc82d6d656b3/selector_test.go
3232
type selectorTest struct {
@@ -549,3 +549,98 @@ var selectorTests = []selectorTest{
549549
},
550550
},
551551
}
552+
553+
type PieceAttrTest struct {
554+
HTML, selector string
555+
results []string
556+
piece OutputStyleMap
557+
}
558+
559+
var PieceAttrTests = []PieceAttrTest{
560+
{
561+
`<ul>
562+
<li><a id="a1" href="http://www.google.com/finance"/>
563+
<li><a id="a2" href="http://finance.yahoo.com/"/>
564+
<li><a id="a3" href="https://www.google.com/news"></a>
565+
<li><a id="a4" href="http://news.yahoo.com"/>
566+
</ul>`,
567+
`li`,
568+
[]string{
569+
`id,`,
570+
`a1,`,
571+
`a2,`,
572+
`a3,`,
573+
`a4,`,
574+
},
575+
OutputStyleMap{
576+
[]string{"id"},
577+
map[string]string{"id": "a"},
578+
map[string]OutputStyle{"id": OutputStyleATTR},
579+
map[string]string{"id": "id"},
580+
},
581+
},
582+
{
583+
`<ul>
584+
<li><a id="a1" href="http://www.google.com/finance"/>
585+
<li><a id="a2" href="http://finance.yahoo.com/"/>
586+
<li><a id="a3" href="https://www.google.com/news"></a>
587+
<li><a id="a4" href="http://news.yahoo.com"/>
588+
</ul>`,
589+
`li`,
590+
[]string{
591+
`href2,`,
592+
`,`,
593+
`,`,
594+
`,`,
595+
`,`,
596+
},
597+
OutputStyleMap{
598+
[]string{"href2"},
599+
map[string]string{"href2": "a"},
600+
map[string]OutputStyle{"href2": OutputStyleATTR},
601+
map[string]string{"href2": "href2"},
602+
},
603+
},
604+
{
605+
`<ul>
606+
<li><a id="a1" href="http://www.google.com/finance"/>
607+
<li><a id="a2" href="http://finance.yahoo.com/"/>
608+
<li><a id="a3" href="https://www.google.com/news"></a>
609+
<li><a id="a4" href="http://news.yahoo.com"/>
610+
</ul>`,
611+
`li`,
612+
[]string{
613+
`href,`,
614+
`http://www.google.com/finance,`,
615+
`http://finance.yahoo.com/,`,
616+
`https://www.google.com/news,`,
617+
`http://news.yahoo.com,`,
618+
},
619+
OutputStyleMap{
620+
[]string{"href"},
621+
map[string]string{"href": "a"},
622+
map[string]OutputStyle{"href": OutputStyleATTR},
623+
map[string]string{"href": "href"},
624+
},
625+
},
626+
}
627+
628+
func TestPieceAttr(t *testing.T) {
629+
for _, test := range PieceAttrTests {
630+
buf := bytes.NewBufferString("")
631+
Opts.CSS, Opts.Piece, Opts.Deli,
632+
Opts.WrapHTML, Opts.TextOut, Opts.TextRaw, Opts.Quiet =
633+
[]string{test.selector}, test.piece, ",",
634+
false, false, false, false
635+
Cascadia(strings.NewReader(test.HTML), buf, Opts)
636+
got := buf.String()
637+
if len(got) == 0 && len(test.results) == 0 {
638+
// correct
639+
continue
640+
}
641+
want := strings.Join(test.results, "\n") + "\n"
642+
if got != want {
643+
t.Errorf("wanted %s, got %s instead", want, got)
644+
}
645+
}
646+
}

0 commit comments

Comments
 (0)