Skip to content

Commit 0cdfaa9

Browse files
authored
improve NamedFile and embed file detect text content type (#1210)
1 parent 8aed167 commit 0cdfaa9

File tree

10 files changed

+89
-26
lines changed

10 files changed

+89
-26
lines changed

Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,10 @@ bytes = "1"
5252
bcrypt = "0.17"
5353
cookie = "0.18"
5454
chacha20poly1305 = "0.10"
55+
chardetng = "0.1"
5556
chrono = "0.4"
5657
compact_str = { version = "0.9", features = ["serde"] }
58+
content_inspector = "0.2"
5759
encoding_rs = "0.8"
5860
email_address = "0.2"
5961
enumflags2 = "0.7"

crates/core/Cargo.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ anyhow = { workspace = true, optional = true }
4444
async-trait = { workspace = true }
4545
base64 = { workspace = true }
4646
bytes = { workspace = true }
47+
chardetng = { workspace = true }
48+
content_inspector = { workspace = true }
4749
cookie = { workspace = true, features = ["percent-encode", "private", "signed"], optional = true }
4850
encoding_rs = { workspace = true, optional = true }
4951
enumflags2 = { workspace = true }

crates/core/src/fs/named_file.rs

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,13 @@ use std::os::unix::fs::MetadataExt;
1111
use enumflags2::{BitFlags, bitflags};
1212
use headers::*;
1313
use tokio::fs::File;
14+
use tokio::io::AsyncReadExt;
1415

1516
use super::{ChunkedFile, ChunkedState};
1617
use crate::http::header::{
1718
CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_TYPE, IF_NONE_MATCH, RANGE,
1819
};
19-
use crate::http::{HttpRange, Mime, Request, Response, StatusCode, StatusError};
20+
use crate::http::{HttpRange, Mime, Request, Response, StatusCode, StatusError, detect_text_mime};
2021
use crate::{Depot, Error, Result, Writer, async_trait};
2122

2223
const CHUNK_SIZE: u64 = 1024 * 1024;
@@ -171,21 +172,30 @@ impl NamedFileBuilder {
171172
} = self;
172173

173174
let file = File::open(&path).await?;
174-
let content_type = content_type.unwrap_or_else(|| {
175-
let ct = mime_infer::from_path(&path).first_or_octet_stream();
176-
let ftype = ct.type_();
177-
let stype = ct.subtype();
178-
if (ftype == mime::TEXT || stype == mime::JSON || stype == mime::JAVASCRIPT)
179-
&& ct.get_param(mime::CHARSET).is_none()
180-
{
181-
//TODO: auto detect charset
182-
format!("{ct}; charset=utf-8")
183-
.parse::<mime::Mime>()
184-
.unwrap_or(ct)
175+
let content_type =
176+
if let Some(mime) = content_type.or_else(|| mime_infer::from_path(&path).first()) {
177+
if mime == mime::TEXT_PLAIN {
178+
let mut buffer: Vec<u8> = vec![];
179+
let _ = file.take(1024).read(&mut buffer).await;
180+
if let Some(mime) = detect_text_mime(&buffer) {
181+
mime
182+
} else {
183+
mime
184+
}
185+
} else {
186+
mime
187+
}
185188
} else {
186-
ct
187-
}
188-
});
189+
let mut buffer: Vec<u8> = vec![];
190+
let _ = file.take(1024).read(&mut buffer).await;
191+
if let Some(mime) = detect_text_mime(&buffer) {
192+
mime
193+
} else {
194+
mime::APPLICATION_OCTET_STREAM
195+
}
196+
};
197+
198+
let file = File::open(&path).await?;
189199
let metadata = file.metadata().await?;
190200
let modified = metadata.modified().ok();
191201
let content_encoding = match content_encoding {

crates/core/src/http/mod.rs

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,28 @@ pub fn guess_accept_mime(req: &Request, default_type: Option<Mime>) -> Mime {
6363
.unwrap_or(dmime)
6464
}
6565

66+
#[doc(hidden)]
67+
#[inline]
68+
#[must_use]
69+
pub fn detect_text_mime(buffer: &[u8]) -> Option<mime::Mime> {
70+
let info = content_inspector::inspect(buffer);
71+
if info.is_text() {
72+
let mut detector = chardetng::EncodingDetector::new();
73+
detector.feed(buffer, buffer.len() < 1024);
74+
75+
let (encoding, _) = detector.guess_assess(None, true);
76+
if encoding.name().eq_ignore_ascii_case("utf-8") {
77+
Some(mime::TEXT_PLAIN_UTF_8)
78+
} else {
79+
format!("text/plain; charset={}", encoding.name())
80+
.parse::<mime::Mime>()
81+
.ok()
82+
}
83+
} else {
84+
None
85+
}
86+
}
87+
6688
#[cfg(test)]
6789
mod tests {
6890
use super::header::*;

crates/serve-static/src/dir.rs

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -447,10 +447,7 @@ impl Handler for StaticDir {
447447
};
448448

449449
let builder = {
450-
let mut builder = NamedFile::builder(named_path).content_type(
451-
mime_infer::from_ext(ext.as_deref().unwrap_or_default())
452-
.first_or_octet_stream(),
453-
);
450+
let mut builder = NamedFile::builder(named_path);
454451
if let Some(content_encoding) = content_encoding {
455452
builder = builder.content_encoding(content_encoding);
456453
}

crates/serve-static/src/embed.rs

Lines changed: 22 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@ use salvo_core::http::header::{
88
ACCEPT_RANGES, CONTENT_LENGTH, CONTENT_TYPE, ETAG, IF_NONE_MATCH, RANGE,
99
};
1010
use salvo_core::http::headers::{ContentLength, ContentRange, HeaderMapExt};
11-
use salvo_core::http::{HeaderValue, HttpRange, Mime, Request, Response, StatusCode};
11+
use salvo_core::http::{
12+
HeaderValue, HttpRange, Mime, Request, Response, StatusCode, detect_text_mime,
13+
};
1214
use salvo_core::{Depot, FlowCtrl, IntoVecString, async_trait};
1315

1416
use super::{decode_url_path_safely, format_url_path_safely, join_path, redirect_to_dir_url};
@@ -63,14 +65,29 @@ fn render_embedded_data(
6365
metadata: &Metadata,
6466
req: &Request,
6567
res: &mut Response,
66-
mime_override: Option<Mime>,
68+
mime: Option<Mime>,
6769
) {
6870
// Determine Content-Type once
69-
let effective_mime = mime_override
70-
.unwrap_or_else(|| mime_infer::from_path(req.uri().path()).first_or_octet_stream());
71+
let content_type =
72+
if let Some(mime) = mime.or_else(|| mime_infer::from_path(req.uri().path()).first()) {
73+
if mime == mime::TEXT_PLAIN {
74+
if let Some(mime) = detect_text_mime(&data) {
75+
mime
76+
} else {
77+
mime
78+
}
79+
} else {
80+
mime
81+
}
82+
} else if let Some(mime) = detect_text_mime(&data) {
83+
mime
84+
} else {
85+
mime::APPLICATION_OCTET_STREAM
86+
};
87+
7188
res.headers_mut().insert(
7289
CONTENT_TYPE,
73-
effective_mime
90+
content_type
7491
.as_ref()
7592
.parse()
7693
.unwrap_or_else(|_| HeaderValue::from_static("application/octet-stream")),

examples/static-dir-list/static/boy/work_ansi.txt

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,10 @@ Let me take it down
22

33
An elephant said to a mouse ,"no doubt that you are the smallest znd most useless thing that Ihave e ver seen ."
44

5-
"Pless ,say it again .Let me take it down ."the mouse said ."I will tell a flea what I know.
5+
"Pless ,say it again .Let me take it down ."the mouse said ."I will tell a flea what I know.
6+
7+
���Ұ���������
8+
9+
һͷ�����һֻ����˵�����������ʣ������Ҽ�������С����û�õĶ�������
10+
11+
������˵һ�顣���Ұ�����������������˵�����һ����֪���ĸ������顣��
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
你好朋友
2+
abc
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
<html>
22
<body>
33
Index page
4+
5+
<p><a href="test1.txt">test1.txt</a></p>
6+
<p><a href="test2.txt">test2.txt</a></p>
47
</body>
58
</html>
Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,3 @@
1-
copy1
1+
copy1
2+
3+
你好朋友

0 commit comments

Comments
 (0)