Skip to content

Commit af62a13

Browse files
committed
various improvements
1 parent 11b8661 commit af62a13

File tree

9 files changed

+359
-129
lines changed

9 files changed

+359
-129
lines changed

Cargo.lock

Lines changed: 179 additions & 54 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -26,20 +26,21 @@ license = "CC0-1.0"
2626
atty = "0.2.14" # Used for highlighting network errors
2727
base64 = "0.22.1" # Used for integrity attributes
2828
chrono = "0.4.40" # Used for formatting timestamps
29-
clap = { version = "4.5.34", features = ["derive"], optional = true } # Used for processing CLI arguments
29+
clap = { version = "4.5.35", features = ["derive"], optional = true } # Used for processing CLI arguments
3030
cssparser = "0.35.0" # Used for dealing with CSS
31+
directories = { version = "6.0.0", optional = true } # Used for GUI
3132
druid = { version = "0.8.3", optional = true } # Used for GUI
3233
encoding_rs = "0.8.35" # Used for parsing and converting document charsets
33-
html5ever = "0.28.0" # Used for all things DOM
34-
markup5ever_rcdom = "=0.4.0-unofficial" # Used for manipulating DOM
34+
html5ever = "0.29.1" # Used for all things DOM
35+
markup5ever_rcdom = "=0.5.0-unofficial" # Used for manipulating DOM
3536
percent-encoding = "2.3.1" # Used for encoding URLs
3637
sha2 = "0.10.8" # Used for calculating checksums during integrity checks
3738
redb = "2.4.0" # Used for on-disk caching of remote assets
3839
tempfile = { version = "3.19.1", optional = true } # Used for on-disk caching of remote assets
3940
url = "2.5.4" # Used for parsing URLs
4041
openssl = "0.10.72" # Used for static linking of the OpenSSL library
4142

42-
# Used for parsing srcset and NOSCRIPT
43+
# Used for unwrapping NOSCRIPT
4344
[dependencies.regex]
4445
version = "1.11.1"
4546
default-features = false
@@ -57,7 +58,7 @@ assert_cmd = "2.0.16"
5758
[features]
5859
default = ["cli", "vendored-openssl"]
5960
cli = ["clap", "tempfile"] # Build a CLI tool that includes main() function
60-
gui = ["druid", "tempfile"] # Build a GUI executable that includes main() function
61+
gui = ["directories", "druid", "tempfile"] # Build a GUI executable that includes main() function
6162
vendored-openssl = ["openssl/vendored"] # Compile and statically link a copy of OpenSSL
6263

6364
[lib]

Makefile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ install:
2929

3030
lint:
3131
@cargo clippy --fix --allow-dirty --allow-staged
32+
# @cargo fix --allow-dirty --allow-staged
3233
.PHONY: lint
3334

3435
lint_check:

README.md

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,6 @@
44
[![Monolith Actor on Apify](https://apify.com/actor-badge?actor=netmilk/monolith)](https://apify.com/netmilk/monolith?fpr=monolith)
55

66

7-
8-
97
```
108
_____ _____________ __________ ___________________ ___
119
| \ / \ | | | | | |
@@ -225,6 +223,7 @@ monolith -I -d example.com -d www.example.com https://example.com -o example-onl
225223
monolith -I -B -d .googleusercontent.com -d googleanalytics.com -d .google.com https://example.com -o example-no-ads.html
226224
```
227225

226+
228227
---------------------------------------------------
229228

230229

@@ -242,6 +241,16 @@ chromium --headless --window-size=1920,1080 --run-all-compositor-stages-before-d
242241
---------------------------------------------------
243242

244243

244+
## Authentication
245+
246+
```console
247+
monolith https://username:[email protected] -o example-basic-auth.html
248+
```
249+
250+
251+
---------------------------------------------------
252+
253+
245254
## Proxies
246255

247256
Please set `https_proxy`, `http_proxy`, and `no_proxy` environment variables.

src/core.rs

Lines changed: 25 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -266,10 +266,9 @@ pub fn create_monolithic_document_from_data(
266266
}
267267

268268
// Append noindex META-tag
269-
if let meta_robots_content_value = get_robots(&dom.document).unwrap_or_default() {
270-
if meta_robots_content_value.trim().is_empty() || meta_robots_content_value != "none" {
271-
dom = set_robots(dom, "none");
272-
}
269+
let meta_robots_content_value = get_robots(&dom.document).unwrap_or_default();
270+
if meta_robots_content_value.trim().is_empty() || meta_robots_content_value != "none" {
271+
dom = set_robots(dom, "none");
273272
}
274273

275274
// Save using specified charset, if given
@@ -523,11 +522,14 @@ pub fn domain_is_within_domain(domain: &str, domain_to_match_against: &str) -> b
523522
ok
524523
}
525524

526-
pub fn format_output_path(destination: &str, document_title: &str) -> String {
525+
pub fn format_output_path(
526+
path: &str,
527+
document_title: &str,
528+
output_format: MonolithOutputFormat,
529+
) -> String {
527530
let datetime: &str = &Utc::now().to_rfc3339_opts(SecondsFormat::Secs, true);
528531

529-
destination
530-
.replace("%timestamp%", &datetime.replace(':', "_"))
532+
path.replace("%timestamp%", &datetime.replace(':', "_"))
531533
.replace(
532534
"%title%",
533535
document_title
@@ -541,13 +543,22 @@ pub fn format_output_path(destination: &str, document_title: &str) -> String {
541543
.replace('?', "")
542544
.trim_start_matches('.'),
543545
)
544-
.to_string()
545-
.replace('<', "[")
546-
.replace('>', "]")
547-
.replace(':', " - ")
548-
.replace('\"', "")
549-
.replace('|', "-")
550-
.replace('?', "")
546+
.replace(
547+
"%ext%",
548+
if output_format == MonolithOutputFormat::HTML {
549+
"html"
550+
} else {
551+
""
552+
},
553+
)
554+
.replace(
555+
"%extension%",
556+
if output_format == MonolithOutputFormat::HTML {
557+
"html"
558+
} else {
559+
""
560+
},
561+
)
551562
.to_string()
552563
}
553564

src/css.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -229,8 +229,10 @@ pub fn process_css<'a>(
229229
result.push_str(format_quoted_string(EMPTY_IMAGE_DATA_URL).as_str());
230230
} else {
231231
let resolved_url: Url = resolve_url(document_url, value);
232+
232233
match retrieve_asset(cache, client, document_url, &resolved_url, options) {
233234
Ok((data, final_url, media_type, charset)) => {
235+
// TODO: if it's @font-face, exclude definitions of non-woff/woff-2 fonts (if woff/woff-2 are present)
234236
let mut data_url =
235237
create_data_url(&media_type, &charset, &data, &final_url);
236238
data_url.set_fragment(resolved_url.fragment());

0 commit comments

Comments
 (0)