Skip to content

Commit 7c00a1e

Browse files
committed
- [#] update --piece handling; fix #3
1 parent a801f4d commit 7c00a1e

File tree

5 files changed

+36
-40
lines changed

5 files changed

+36
-40
lines changed

cascadia_cli.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
# program name, name for the executable
22
ProgramName: cascadia
33
Authors: Tong Sun
4+
Since: 2016
45

56
PackageName: main
67

@@ -41,7 +42,7 @@ Options:
4142
- Name: Piece
4243
Type: PieceStyleMap
4344
Flag: 'p,piece'
44-
Usage: 'sub CSS selectors within -css to split that block up into pieces\n\t\t\t\tformat: PieceName=[PieceStyle:]selector_string\n\t\t\t\t PieceStyle:\n\t\t\t\t RAW : will return the selected as-is\n\t\t\t\t attr[xx] : will return the value of xx attribute\n\t\t\t\telse the text will be returned'
45+
Usage: 'sub CSS selectors within -css to split that block up into pieces\n\t\t\tformat: PieceName=[PieceStyle:]selector_string\n\t\t\t PieceStyle:\n\t\t\t RAW : will return the selected as-is\n\t\t\t ATTR : will return the value of attribute selector_string\n\t\t\t Else the text will be returned'
4546

4647
- Name: Deli
4748
Type: string

cascadia_cliDef.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
////////////////////////////////////////////////////////////////////////////
22
// Program: cascadiaC
33
// Purpose: cascadia wrapper
4-
// Authors: Tong Sun (c) 2023, All rights reserved
4+
// Authors: Tong Sun (c) 2016-2023, All rights reserved
55
////////////////////////////////////////////////////////////////////////////
66

77
package main
@@ -23,23 +23,23 @@ import (
2323

2424
type rootT struct {
2525
cli.Helper
26-
Filei *clix.Reader `cli:"*i,in" usage:"The html/xml file to read from (or stdin)"`
27-
Fileo *clix.Writer `cli:"*o,out" usage:"The output file (or stdout)"`
28-
CSS []string `cli:"*c,css" usage:"CSS selectors (can provide more if not using --piece)"`
29-
TextOut bool `cli:"t,text" usage:"Text output for none-block selection mode"`
30-
TextRaw bool `cli:"R,Raw" usage:"Raw text output, no trimming of leading and trailing white space"`
31-
Piece PieceStyleMap `cli:"p,piece" usage:"sub CSS selectors within -css to split that block up into pieces\n\t\t\t\tformat: PieceName=[PieceStyle:]selector_string\n\t\t\t\t PieceStyle:\n\t\t\t\t RAW : will return the selected as-is\n\t\t\t\t attr[xx] : will return the value of xx attribute\n\t\t\t\telse the text will be returned"`
32-
Deli string `cli:"d,delimiter" usage:"delimiter for pieces csv output" dft:"\t"`
33-
WrapHTML bool `cli:"w,wrap-html" usage:"wrap up the output with html tags"`
34-
Style string `cli:"y,style" usage:"style component within the wrapped html head"`
35-
Base string `cli:"b,base" usage:"base href tag used in the wrapped up html"`
36-
Quiet bool `cli:"q,quiet" usage:"be quiet"`
26+
Filei *clix.Reader `cli:"*i,in" usage:"The html/xml file to read from (or stdin)"`
27+
Fileo *clix.Writer `cli:"*o,out" usage:"The output file (or stdout)"`
28+
CSS []string `cli:"*c,css" usage:"CSS selectors (can provide more if not using --piece)"`
29+
TextOut bool `cli:"t,text" usage:"Text output for none-block selection mode"`
30+
TextRaw bool `cli:"R,Raw" usage:"Raw text output, no trimming of leading and trailing white space"`
31+
Piece PieceStyleMap `cli:"p,piece" usage:"sub CSS selectors within -css to split that block up into pieces\n\t\t\tformat: PieceName=[PieceStyle:]selector_string\n\t\t\t PieceStyle:\n\t\t\t RAW : will return the selected as-is\n\t\t\t ATTR : will return the value of attribute selector_string\n\t\t\t Else the text will be returned"`
32+
Deli string `cli:"d,delimiter" usage:"delimiter for pieces csv output" dft:"\t"`
33+
WrapHTML bool `cli:"w,wrap-html" usage:"wrap up the output with html tags"`
34+
Style string `cli:"y,style" usage:"style component within the wrapped html head"`
35+
Base string `cli:"b,base" usage:"base href tag used in the wrapped up html"`
36+
Quiet bool `cli:"q,quiet" usage:"be quiet"`
3737
}
3838

3939
var root = &cli.Command{
4040
Name: "cascadiaC",
4141
Desc: "cascadia wrapper\nVersion " + version + " built on " + date +
42-
"\nCopyright (C) 2023, Tong Sun",
42+
"\nCopyright (C) 2016-2023, Tong Sun",
4343
Text: "Command line interface to go cascadia CSS selectors package" +
4444
"\n\nUsage:\n cascadia -i in -c css -o [Options...]",
4545
Argv: func() interface{} { return new(rootT) },
@@ -74,7 +74,7 @@ var root = &cli.Command{
7474
// var (
7575
// progname = "cascadiaC"
7676
// version = "0.1.0"
77-
// date = "2023-01-08"
77+
// date = "2023-06-29"
7878

7979
// rootArgv *rootT
8080
// // Opts store all the configurable options

cascadia_main.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ func Cascadia(bi io.Reader, bw io.Writer, Opts OptsT) error {
172172
fmt.Fprintf(bw, deli)
173173
case PieceStyleATTR:
174174
fmt.Fprintf(bw, "%s%s",
175-
item.Find(piece.Values[key]).AttrOr(piece.AttrName[key], ""), deli)
175+
item.AttrOr(piece.Values[key], ""), deli)
176176
case PieceStyleTEXT:
177177
fmt.Fprintf(bw, "%s%s",
178178
item.Find(piece.Values[key]).Contents().Text(), deli)

piece_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,10 @@ import (
77
func TestPieceAttr(t *testing.T) {
88
testCases(t, "Piece Attributes", []testCase{
99
{"id",
10-
[]string{"-i", "opt_piece_attr.html", "-o", "-c", "li", "-p", "url=attr[id]:a"},
10+
[]string{"-i", "opt_piece_attr.html", "-o", "-c", "li > a", "-p", "url=ATTR:id"},
1111
},
1212
{"href",
13-
[]string{"-i", "opt_piece_attr.html", "-o", "-c", "li", "-p", "url=attr[href]:a"},
13+
[]string{"-i", "opt_piece_attr.html", "-o", "-c", "li > a", "-p", "url=ATTR:href"},
1414
},
1515
//{"both_fields",},
1616
//{"noexist"},

prop_piece.go

Lines changed: 17 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -9,22 +9,25 @@ package main
99
import (
1010
"errors"
1111
"regexp"
12-
"strings"
1312
)
1413

1514
type PieceStyle int
1615

1716
const (
18-
PieceStyleRAW PieceStyle = iota
17+
PieceStyleTEXT PieceStyle = iota
18+
PieceStyleRAW
1919
PieceStyleATTR
20-
PieceStyleTEXT
2120
)
2221

2322
type PieceStyleMap struct {
2423
Keys []string
2524
Values map[string]string
2625
PieceStyles map[string]PieceStyle
27-
AttrName map[string]string
26+
}
27+
28+
var pieceStyles = map[string]PieceStyle{
29+
"RAW": PieceStyleRAW,
30+
"ATTR": PieceStyleATTR,
2831
}
2932

3033
//==========================================================================
@@ -39,30 +42,22 @@ func (m *PieceStyleMap) Decode(s string) error {
3942
if (m.Values) == nil {
4043
m.Values = make(map[string]string)
4144
m.PieceStyles = make(map[string]PieceStyle)
42-
m.AttrName = make(map[string]string)
4345
}
44-
matches := regexp.MustCompile("(.*)=(.*)").FindStringSubmatch(s)
45-
if len(matches) < 2 {
46+
matches := regexp.MustCompile("(.*)=((.*?):)?(.*)").FindStringSubmatch(s)
47+
if len(matches) < 3 {
4648
return errors.New("format error. To get help, run: " + progname)
4749
}
4850
key := matches[1]
49-
val := matches[2]
50-
index := strings.Index(val, ":")
51-
if index > 0 {
52-
style := val[:index]
53-
val = val[index+1:]
54-
if style == IsRaw {
55-
m.PieceStyles[key] = PieceStyleRAW
56-
} else if strings.HasPrefix(style, "attr[") && strings.HasSuffix(style, "]") {
57-
m.PieceStyles[key] = PieceStyleATTR
58-
m.AttrName[key] = style[5 : len(style)-1]
59-
} else {
60-
m.PieceStyles[key] = PieceStyleTEXT
61-
}
62-
} else {
63-
m.PieceStyles[key] = PieceStyleTEXT
51+
ptp := matches[3] // piece type
52+
val := matches[4]
53+
style := PieceStyle(0)
54+
style, ok := pieceStyles[ptp]
55+
//fmt.Println("]", key, ptp, style, ok, val)
56+
if len(ptp) != 0 && !ok {
57+
return errors.New("Piece style specification error. To get help, run: " + progname)
6458
}
6559
m.Keys = append(m.Keys, key)
60+
m.PieceStyles[key] = style
6661
m.Values[key] = val
6762
return nil
6863
}

0 commit comments

Comments
 (0)