Skip to content

Commit efb1759

Browse files
authored
Detect correct mime for all text type (#1211)
1 parent 0cdfaa9 commit efb1759

File tree

9 files changed

+119
-84
lines changed

9 files changed

+119
-84
lines changed

crates/core/src/catcher.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,8 @@ use mime::Mime;
4444
use serde::Serialize;
4545

4646
use crate::handler::{Handler, WhenHoop};
47-
use crate::http::{Request, ResBody, Response, StatusCode, StatusError, guess_accept_mime, header};
47+
use crate::http::mime::guess_accept_mime;
48+
use crate::http::{Request, ResBody, Response, StatusCode, StatusError, header};
4849
use crate::{Depot, FlowCtrl};
4950

5051
static SUPPORTED_FORMATS: LazyLock<Vec<mime::Name>> =

crates/core/src/fs/mod.rs

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,10 @@ mod test {
131131
.await
132132
.unwrap();
133133
assert_eq!(file.path(), Path::new(src));
134-
assert_eq!(file.content_type(), &Mime::from_str("text/html").unwrap());
134+
assert_eq!(
135+
file.content_type(),
136+
&Mime::from_str("text/html; charset=utf8").unwrap()
137+
);
135138
assert_eq!(
136139
file.content_disposition(),
137140
Some(&HeaderValue::from_static(

crates/core/src/fs/named_file.rs

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,16 @@ use std::os::unix::fs::MetadataExt;
1010

1111
use enumflags2::{BitFlags, bitflags};
1212
use headers::*;
13+
use mime::Mime;
1314
use tokio::fs::File;
1415
use tokio::io::AsyncReadExt;
1516

1617
use super::{ChunkedFile, ChunkedState};
1718
use crate::http::header::{
1819
CONTENT_DISPOSITION, CONTENT_ENCODING, CONTENT_TYPE, IF_NONE_MATCH, RANGE,
1920
};
20-
use crate::http::{HttpRange, Mime, Request, Response, StatusCode, StatusError, detect_text_mime};
21+
use crate::http::mime::{detect_text_mime, fill_mime_charset_if_need, is_charset_required_mime};
22+
use crate::http::{HttpRange, Request, Response, StatusCode, StatusError};
2123
use crate::{Depot, Error, Result, Writer, async_trait};
2224

2325
const CHUNK_SIZE: u64 = 1024 * 1024;
@@ -173,18 +175,13 @@ impl NamedFileBuilder {
173175

174176
let file = File::open(&path).await?;
175177
let content_type =
176-
if let Some(mime) = content_type.or_else(|| mime_infer::from_path(&path).first()) {
177-
if mime == mime::TEXT_PLAIN {
178+
if let Some(mut mime) = content_type.or_else(|| mime_infer::from_path(&path).first()) {
179+
if is_charset_required_mime(&mime) {
178180
let mut buffer: Vec<u8> = vec![];
179181
let _ = file.take(1024).read(&mut buffer).await;
180-
if let Some(mime) = detect_text_mime(&buffer) {
181-
mime
182-
} else {
183-
mime
184-
}
185-
} else {
186-
mime
182+
fill_mime_charset_if_need(&mut mime, &buffer);
187183
}
184+
mime
188185
} else {
189186
let mut buffer: Vec<u8> = vec![];
190187
let _ = file.take(1024).read(&mut buffer).await;

crates/core/src/http/mime.rs

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
//! Reexport of the `mime` crate and some mime related utilities.
2+
3+
pub use mime::*;
4+
5+
use crate::http::Request;
6+
7+
#[doc(hidden)]
8+
#[inline]
9+
pub fn guess_accept_mime(req: &Request, default_type: Option<Mime>) -> Mime {
10+
let dmime: Mime = default_type.unwrap_or(mime::TEXT_HTML);
11+
let accept = req.accept();
12+
accept
13+
.first()
14+
.unwrap_or(&dmime)
15+
.to_string()
16+
.parse()
17+
.unwrap_or(dmime)
18+
}
19+
20+
#[doc(hidden)]
21+
#[inline]
22+
#[must_use]
23+
pub fn detect_text_mime(buffer: &[u8]) -> Option<Mime> {
24+
let info = content_inspector::inspect(buffer);
25+
if info.is_text() {
26+
if let Some(charset) = detect_text_charset(buffer) {
27+
if charset.eq_ignore_ascii_case("utf-8") {
28+
Some(mime::TEXT_PLAIN_UTF_8)
29+
} else {
30+
format!("text/plain; charset={charset}")
31+
.parse::<Mime>()
32+
.ok()
33+
}
34+
} else {
35+
Some(mime::TEXT_PLAIN_UTF_8)
36+
}
37+
} else {
38+
None
39+
}
40+
}
41+
42+
#[doc(hidden)]
43+
#[inline]
44+
#[must_use]
45+
pub fn detect_text_charset(buffer: &[u8]) -> Option<String> {
46+
let mut detector = chardetng::EncodingDetector::new();
47+
detector.feed(buffer, buffer.len() < 1024);
48+
49+
let (encoding, _) = detector.guess_assess(None, true);
50+
if encoding.name().eq_ignore_ascii_case("utf-8") {
51+
Some("utf-8".into())
52+
} else {
53+
Some(encoding.name().into())
54+
}
55+
}
56+
57+
#[doc(hidden)]
58+
#[inline]
59+
#[must_use]
60+
pub fn is_charset_required_mime(mime: &Mime) -> bool {
61+
matches!(mime.subtype(), mime::JAVASCRIPT | mime::XML | mime::JSON)
62+
|| matches!(mime.type_(), mime::TEXT)
63+
}
64+
65+
#[doc(hidden)]
66+
#[inline]
67+
pub fn fill_mime_charset_if_need(mime: &mut Mime, buffer: &[u8]) {
68+
if !is_charset_required_mime(mime) || mime.get_param("charset").is_some() {
69+
return;
70+
}
71+
if let Some(charset) = detect_text_charset(buffer) {
72+
if let Ok(new_mime) = format!("{mime}; charset={charset}").parse::<Mime>() {
73+
*mime = new_mime;
74+
}
75+
}
76+
}
77+
78+
#[cfg(test)]
79+
mod tests {
80+
use super::*;
81+
use crate::http::header::*;
82+
83+
#[test]
84+
fn test_guess_accept_mime() {
85+
let mut req = Request::default();
86+
let headers = req.headers_mut();
87+
headers.insert(ACCEPT, HeaderValue::from_static("application/javascript"));
88+
let mime = guess_accept_mime(&req, None);
89+
assert_eq!(mime, "application/javascript".parse::<Mime>().unwrap());
90+
}
91+
}

crates/core/src/http/mod.rs

Lines changed: 3 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -2,22 +2,24 @@
22
33
pub mod errors;
44
pub mod form;
5+
pub mod mime;
56
mod range;
67
pub mod request;
78
pub mod response;
89
cfg_feature! {
910
#![feature = "cookie"]
1011
pub use cookie;
1112
}
13+
1214
pub use errors::{ParseError, ParseResult, StatusError, StatusResult};
1315
pub use headers;
1416
pub use http::method::Method;
1517
pub use http::{HeaderMap, HeaderName, HeaderValue, StatusCode, header, method, uri};
16-
pub use mime::{self, Mime};
1718
pub use range::HttpRange;
1819
pub use request::Request;
1920
pub mod body;
2021
pub use body::{Body, ReqBody, ResBody};
22+
pub use mime::Mime;
2123
pub use response::Response;
2224

2325
pub use http::version::Version;
@@ -49,53 +51,3 @@ pub fn parse_accept_encoding(header: &str) -> Vec<(String, u8)> {
4951

5052
vec
5153
}
52-
53-
#[doc(hidden)]
54-
#[inline]
55-
pub fn guess_accept_mime(req: &Request, default_type: Option<Mime>) -> Mime {
56-
let dmime: Mime = default_type.unwrap_or(mime::TEXT_HTML);
57-
let accept = req.accept();
58-
accept
59-
.first()
60-
.unwrap_or(&dmime)
61-
.to_string()
62-
.parse()
63-
.unwrap_or(dmime)
64-
}
65-
66-
#[doc(hidden)]
67-
#[inline]
68-
#[must_use]
69-
pub fn detect_text_mime(buffer: &[u8]) -> Option<mime::Mime> {
70-
let info = content_inspector::inspect(buffer);
71-
if info.is_text() {
72-
let mut detector = chardetng::EncodingDetector::new();
73-
detector.feed(buffer, buffer.len() < 1024);
74-
75-
let (encoding, _) = detector.guess_assess(None, true);
76-
if encoding.name().eq_ignore_ascii_case("utf-8") {
77-
Some(mime::TEXT_PLAIN_UTF_8)
78-
} else {
79-
format!("text/plain; charset={}", encoding.name())
80-
.parse::<mime::Mime>()
81-
.ok()
82-
}
83-
} else {
84-
None
85-
}
86-
}
87-
88-
#[cfg(test)]
89-
mod tests {
90-
use super::header::*;
91-
use super::*;
92-
93-
#[test]
94-
fn test_guess_accept_mime() {
95-
let mut req = Request::default();
96-
let headers = req.headers_mut();
97-
headers.insert(ACCEPT, HeaderValue::from_static("application/javascript"));
98-
let mime = guess_accept_mime(&req, None);
99-
assert_eq!(mime, "application/javascript".parse::<Mime>().unwrap());
100-
}
101-
}

crates/proxy/src/hyper_client.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -116,17 +116,16 @@ mod tests {
116116
#[tokio::test]
117117
async fn test_hyper_client() {
118118
let router = Router::new().push(
119-
Router::with_path("rust/{**rest}").goal(Proxy::new(vec!["https://www.rust-lang.org"], HyperClient::default())),
119+
Router::with_path("rust/{**rest}").goal(Proxy::new(vec!["https://salvo.rs"], HyperClient::default())),
120120
);
121121

122-
let content = TestClient::get("http://127.0.0.1:5801/rust/tools/install")
122+
let content = TestClient::get("http://127.0.0.1:5801/rust/guide/index.html")
123123
.send(router)
124124
.await
125125
.take_string()
126126
.await
127127
.unwrap();
128-
println!("{content}");
129-
assert!(content.contains("Install Rust"));
128+
assert!(content.contains("Salvo"));
130129
}
131130

132131
#[test]

crates/proxy/src/reqwest_client.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -122,16 +122,16 @@ mod tests {
122122
#[tokio::test]
123123
async fn test_reqwest_client() {
124124
let router = Router::new().push(
125-
Router::with_path("rust/{**rest}").goal(Proxy::new(vec!["https://www.rust-lang.org"], ReqwestClient::default())),
125+
Router::with_path("rust/{**rest}").goal(Proxy::new(vec!["https://salvo.rs"], ReqwestClient::default())),
126126
);
127127

128-
let content = TestClient::get("http://127.0.0.1:5801/rust/tools/install")
128+
let content = TestClient::get("http://127.0.0.1:5801/rust/guide/index.html")
129129
.send(router)
130130
.await
131131
.take_string()
132132
.await
133133
.unwrap();
134-
assert!(content.contains("Install Rust"));
134+
assert!(content.contains("Salvo"));
135135
}
136136

137137
#[test]

crates/serve-static/src/embed.rs

Lines changed: 5 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,8 @@ use salvo_core::http::header::{
88
ACCEPT_RANGES, CONTENT_LENGTH, CONTENT_TYPE, ETAG, IF_NONE_MATCH, RANGE,
99
};
1010
use salvo_core::http::headers::{ContentLength, ContentRange, HeaderMapExt};
11-
use salvo_core::http::{
12-
HeaderValue, HttpRange, Mime, Request, Response, StatusCode, detect_text_mime,
13-
};
11+
use salvo_core::http::mime::{fill_mime_charset_if_need, detect_text_mime};
12+
use salvo_core::http::{HeaderValue, HttpRange, Mime, Request, Response, StatusCode};
1413
use salvo_core::{Depot, FlowCtrl, IntoVecString, async_trait};
1514

1615
use super::{decode_url_path_safely, format_url_path_safely, join_path, redirect_to_dir_url};
@@ -69,16 +68,9 @@ fn render_embedded_data(
6968
) {
7069
// Determine Content-Type once
7170
let content_type =
72-
if let Some(mime) = mime.or_else(|| mime_infer::from_path(req.uri().path()).first()) {
73-
if mime == mime::TEXT_PLAIN {
74-
if let Some(mime) = detect_text_mime(&data) {
75-
mime
76-
} else {
77-
mime
78-
}
79-
} else {
80-
mime
81-
}
71+
if let Some(mut mime) = mime.or_else(|| mime_infer::from_path(req.uri().path()).first()) {
72+
fill_mime_charset_if_need(&mut mime, &data);
73+
mime
8274
} else if let Some(mime) = detect_text_mime(&data) {
8375
mime
8476
} else {

examples/proxy-simple/src/main.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ async fn main() {
1212
Router::new()
1313
.host("127.0.0.1")
1414
.path("{**rest}")
15-
.goal(Proxy::use_hyper_client("https://www.rust-lang.org")),
15+
.goal(Proxy::use_hyper_client("https://docs.rs")),
1616
)
1717
.push(
1818
Router::new()

0 commit comments

Comments
 (0)