Skip to content

Commit 7461e0c

Browse files
committed
add piece attr output
1 parent 5f14a39 commit 7461e0c

File tree

4 files changed

+137
-14
lines changed

4 files changed

+137
-14
lines changed

README.md

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -50,8 +50,11 @@ Options:
5050
-t, --text Text output for none-block selection mode
5151
-R, --Raw Raw text output, no trimming of leading and trailing white space
5252
-p, --piece sub CSS selectors within -css to split that block up into pieces
53-
format: PieceName=[RAW:]selector_string
54-
RAW: will return the selected as-is; else the text will be returned
53+
format: PieceName=[OutputStyle:]selector_string
54+
OutputStyle:
55+
RAW : will return the selected as-is
56+
attr[xxx] : will return the value of an attribute named xxx
57+
else the text will be returned
5558
-d, --delimiter delimiter for pieces csv output [= ]
5659
-w, --wrap-html wrap up the output with html tags
5760
-y, --style style component within the wrapped html head

cascadia_cliDef.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ type rootT struct {
2828
CSS []string `cli:"*c,css" usage:"CSS selectors (can provide more if not using --piece)"`
2929
TextOut bool `cli:"t,text" usage:"Text output for none-block selection mode"`
3030
TextRaw bool `cli:"R,Raw" usage:"Raw text output, no trimming of leading and trailing white space"`
31-
Piece MapStringString `cli:"p,piece" usage:"sub CSS selectors within -css to split that block up into pieces\n\t\t\tformat: PieceName=[RAW:]selector_string\n\t\t\tRAW: will return the selected as-is; else the text will be returned"`
31+
Piece MapStringString `cli:"p,piece" usage:"sub CSS selectors within -css to split that block up into pieces\n\t\t\tformat: PieceName=[OutputStyle:]selector_string\n\t\t\tOutputStyle:\n\t\t\t\tRAW : will return the selected as-is\n\t\t\t\tattr[xxx] : will return the value of an attribute named xxx \n\t\t\telse the text will be returned"`
3232
Deli string `cli:"d,delimiter" usage:"delimiter for pieces csv output" dft:"\t"`
3333
WrapHTML bool `cli:"w,wrap-html" usage:"wrap up the output with html tags"`
3434
Style string `cli:"y,style" usage:"style component within the wrapped html head"`

cascadia_main.go

Lines changed: 35 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,23 @@ import (
2626
// Constant and data type/structure definitions
2727

2828
const (
29-
IsRaw = "RAW:"
29+
IsRaw = "RAW"
3030
WrapHTMLEnd = `</body>`
3131
)
3232

33+
type OutputStyle int
34+
35+
const (
36+
OutputStyleRAW OutputStyle = iota
37+
OutputStyleATTR
38+
OutputStyleTEXT
39+
)
40+
3341
type MapStringString struct {
34-
Keys []string
35-
Values map[string]string
36-
Raw map[string]bool
42+
Keys []string
43+
Values map[string]string
44+
OutputStyles map[string]OutputStyle
45+
AttrName map[string]string
3746
}
3847

3948
// The OptsT type defines all the configurable options from cli.
@@ -174,10 +183,14 @@ func Cascadia(bi io.Reader, bw io.Writer, Opts OptsT) error {
174183
//fmt.Printf("] #%d: %s\n", index, item.Text())
175184
for _, key := range piece.Keys {
176185
//fmt.Printf("] %s: %s\n", key, piece.Values[key])
177-
if piece.Raw[key] {
186+
switch piece.OutputStyles[key] {
187+
case OutputStyleRAW:
178188
html.Render(bw, item.Find(piece.Values[key]).Get(0))
179189
fmt.Fprintf(bw, deli)
180-
} else {
190+
case OutputStyleATTR:
191+
fmt.Fprintf(bw, "%s%s",
192+
item.Find(piece.Values[key]).AttrOr(piece.AttrName[key], ""), deli)
193+
case OutputStyleTEXT:
181194
fmt.Fprintf(bw, "%s%s",
182195
item.Find(piece.Values[key]).Contents().Text(), deli)
183196
}
@@ -202,17 +215,29 @@ func (MapStringString) DecodeSlice() {}
202215
func (m *MapStringString) Decode(s string) error {
203216
if (m.Values) == nil {
204217
m.Values = make(map[string]string)
205-
m.Raw = make(map[string]bool)
218+
m.OutputStyles = make(map[string]OutputStyle)
219+
m.AttrName = make(map[string]string)
206220
}
207221
matches := regexp.MustCompile("(.*)=(.*)").FindStringSubmatch(s)
208222
if len(matches) < 2 {
209223
return errors.New("format error. To get help, run: " + progname)
210224
}
211225
key := matches[1]
212226
val := matches[2]
213-
if len(val) >= 4 && val[:4] == IsRaw {
214-
m.Raw[key] = true
215-
val = val[4:]
227+
index := strings.Index(val, ":")
228+
if index > 0 {
229+
style := val[:index]
230+
val = val[index+1:]
231+
if style == IsRaw {
232+
m.OutputStyles[key] = OutputStyleRAW
233+
} else if strings.HasPrefix(style, "attr[") && strings.HasSuffix(style, "]") {
234+
m.OutputStyles[key] = OutputStyleATTR
235+
m.AttrName[key] = style[5 : len(style)-1]
236+
} else {
237+
m.OutputStyles[key] = OutputStyleTEXT
238+
}
239+
} else {
240+
m.OutputStyles[key] = OutputStyleTEXT
216241
}
217242
m.Keys = append(m.Keys, key)
218243
m.Values[key] = val

cascadia_test.go

Lines changed: 96 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ func TestSelectors(t *testing.T) {
2626
}
2727
}
2828

29-
////////////////////////////////////////////////////////////////////////////
29+
// //////////////////////////////////////////////////////////////////////////
3030
// The following is taken from
3131
// https://github.com/andybalholm/cascadia/blob/c56252c33997d9b9991f4c1e3b5fbc82d6d656b3/selector_test.go
3232
type selectorTest struct {
@@ -549,3 +549,98 @@ var selectorTests = []selectorTest{
549549
},
550550
},
551551
}
552+
553+
type PieceAttrTest struct {
554+
HTML, selector string
555+
results []string
556+
piece MapStringString
557+
}
558+
559+
var PieceAttrTests = []PieceAttrTest{
560+
{
561+
`<ul>
562+
<li><a id="a1" href="http://www.google.com/finance"/>
563+
<li><a id="a2" href="http://finance.yahoo.com/"/>
564+
<li><a id="a3" href="https://www.google.com/news"></a>
565+
<li><a id="a4" href="http://news.yahoo.com"/>
566+
</ul>`,
567+
`li`,
568+
[]string{
569+
`id,`,
570+
`a1,`,
571+
`a2,`,
572+
`a3,`,
573+
`a4,`,
574+
},
575+
MapStringString{
576+
[]string{"id"},
577+
map[string]string{"id": "a"},
578+
map[string]OutputStyle{"id": OutputStyleATTR},
579+
map[string]string{"id": "id"},
580+
},
581+
},
582+
{
583+
`<ul>
584+
<li><a id="a1" href="http://www.google.com/finance"/>
585+
<li><a id="a2" href="http://finance.yahoo.com/"/>
586+
<li><a id="a3" href="https://www.google.com/news"></a>
587+
<li><a id="a4" href="http://news.yahoo.com"/>
588+
</ul>`,
589+
`li`,
590+
[]string{
591+
`href2,`,
592+
`,`,
593+
`,`,
594+
`,`,
595+
`,`,
596+
},
597+
MapStringString{
598+
[]string{"href2"},
599+
map[string]string{"href2": "a"},
600+
map[string]OutputStyle{"href2": OutputStyleATTR},
601+
map[string]string{"href2": "href2"},
602+
},
603+
},
604+
{
605+
`<ul>
606+
<li><a id="a1" href="http://www.google.com/finance"/>
607+
<li><a id="a2" href="http://finance.yahoo.com/"/>
608+
<li><a id="a3" href="https://www.google.com/news"></a>
609+
<li><a id="a4" href="http://news.yahoo.com"/>
610+
</ul>`,
611+
`li`,
612+
[]string{
613+
`href,`,
614+
`http://www.google.com/finance,`,
615+
`http://finance.yahoo.com/,`,
616+
`https://www.google.com/news,`,
617+
`http://news.yahoo.com,`,
618+
},
619+
MapStringString{
620+
[]string{"href"},
621+
map[string]string{"href": "a"},
622+
map[string]OutputStyle{"href": OutputStyleATTR},
623+
map[string]string{"href": "href"},
624+
},
625+
},
626+
}
627+
628+
func TestPieceAttr(t *testing.T) {
629+
for _, test := range PieceAttrTests {
630+
buf := bytes.NewBufferString("")
631+
Opts.CSS, Opts.Piece, Opts.Deli,
632+
Opts.WrapHTML, Opts.TextOut, Opts.TextRaw, Opts.Quiet =
633+
[]string{test.selector}, test.piece, ",",
634+
false, false, false, false
635+
Cascadia(strings.NewReader(test.HTML), buf, Opts)
636+
got := buf.String()
637+
if len(got) == 0 && len(test.results) == 0 {
638+
// correct
639+
continue
640+
}
641+
want := strings.Join(test.results, "\n") + "\n"
642+
if got != want {
643+
t.Errorf("wanted %s, got %s instead", want, got)
644+
}
645+
}
646+
}

0 commit comments

Comments
 (0)