Skip to content

Commit d565d2f

Browse files
committed
Unify diagnostic generation for utf8 failure
1 parent 0b0d931 commit d565d2f

File tree

2 files changed

+50
-47
lines changed

2 files changed

+50
-47
lines changed

compiler/rustc_builtin_macros/src/source_util.rs

Lines changed: 2 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ use rustc_expand::base::{
1313
use rustc_expand::module::DirOwnership;
1414
use rustc_lint_defs::BuiltinLintDiag;
1515
use rustc_parse::parser::{ForceCollect, Parser};
16-
use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal};
16+
use rustc_parse::{new_parser_from_file, unwrap_or_emit_fatal, utf8_error};
1717
use rustc_session::lint::builtin::INCOMPLETE_INCLUDE;
1818
use rustc_span::source_map::SourceMap;
1919
use rustc_span::{Pos, Span, Symbol};
@@ -211,25 +211,7 @@ pub(crate) fn expand_include_str(
211211
}
212212
Err(utf8err) => {
213213
let mut err = cx.dcx().struct_span_err(sp, format!("`{path}` wasn't a utf-8 file"));
214-
let path = PathBuf::from(path.as_str());
215-
let start = utf8err.valid_up_to();
216-
let note = format!("invalid utf-8 at byte `{start}`");
217-
let msg = if let Some(len) = utf8err.error_len() {
218-
format!("`{:?}` is not valid utf-8", &bytes[start..start + len])
219-
} else {
220-
note.clone()
221-
};
222-
let contents = String::from_utf8_lossy(&bytes[..]).to_string();
223-
let source = cx.source_map().new_source_file(path.into(), contents);
224-
let span = Span::with_root_ctxt(
225-
source.normalized_byte_pos(start as u32),
226-
source.normalized_byte_pos(start as u32),
227-
);
228-
if span.is_dummy() {
229-
err.note(note);
230-
} else {
231-
err.span_note(span, msg);
232-
}
214+
utf8_error(cx.source_map(), path.as_str(), None, &mut err, utf8err, &bytes[..]);
233215
DummyResult::any(sp, err.emit())
234216
}
235217
},

compiler/rustc_parse/src/lib.rs

Lines changed: 48 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,17 @@
1515
#![warn(unreachable_pub)]
1616
// tidy-alphabetical-end
1717

18-
use std::path::Path;
18+
use std::path::{Path, PathBuf};
19+
use std::str::Utf8Error;
1920

2021
use rustc_ast as ast;
2122
use rustc_ast::tokenstream::TokenStream;
2223
use rustc_ast::{AttrItem, Attribute, MetaItemInner, token};
2324
use rustc_ast_pretty::pprust;
2425
use rustc_data_structures::sync::Lrc;
25-
use rustc_errors::{Diag, FatalError, PResult};
26+
use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult};
2627
use rustc_session::parse::ParseSess;
28+
use rustc_span::source_map::SourceMap;
2729
use rustc_span::{FileName, SourceFile, Span};
2830
pub use unicode_normalization::UNICODE_VERSION as UNICODE_NORMALIZATION_VERSION;
2931

@@ -74,36 +76,21 @@ pub fn new_parser_from_file<'a>(
7476
path: &Path,
7577
sp: Option<Span>,
7678
) -> Result<Parser<'a>, Vec<Diag<'a>>> {
77-
let source_file = psess.source_map().load_file(path).unwrap_or_else(|e| {
79+
let sm = psess.source_map();
80+
let source_file = sm.load_file(path).unwrap_or_else(|e| {
7881
let msg = format!("couldn't read `{}`: {}", path.display(), e);
7982
let mut err = psess.dcx().struct_fatal(msg);
8083
if let Ok(contents) = std::fs::read(path)
81-
&& let Err(utf8err) = String::from_utf8(contents)
84+
&& let Err(utf8err) = String::from_utf8(contents.clone())
8285
{
83-
// The file exists, but it wasn't valid UTF-8.
84-
let start = utf8err.utf8_error().valid_up_to();
85-
let note = format!("invalid utf-8 at byte `{start}`");
86-
let msg = if let Some(len) = utf8err.utf8_error().error_len() {
87-
format!("`{:?}` is not valid utf-8", &utf8err.as_bytes()[start..start + len])
88-
} else {
89-
note.clone()
90-
};
91-
let contents = utf8err.into_utf8_lossy();
92-
let source = psess.source_map().new_source_file(path.to_owned().into(), contents);
93-
let span = Span::with_root_ctxt(
94-
source.normalized_byte_pos(start as u32),
95-
source.normalized_byte_pos(start as u32),
86+
utf8_error(
87+
sm,
88+
&path.display().to_string(),
89+
sp,
90+
&mut err,
91+
utf8err.utf8_error(),
92+
&contents,
9693
);
97-
if span.is_dummy() {
98-
err.note(note);
99-
} else {
100-
if sp.is_some() {
101-
err.span_note(span, msg);
102-
} else {
103-
err.span(span);
104-
err.span_label(span, msg);
105-
}
106-
}
10794
}
10895
if let Some(sp) = sp {
10996
err.span(sp);
@@ -113,6 +100,40 @@ pub fn new_parser_from_file<'a>(
113100
new_parser_from_source_file(psess, source_file)
114101
}
115102

103+
pub fn utf8_error<E: EmissionGuarantee>(
104+
sm: &SourceMap,
105+
path: &str,
106+
sp: Option<Span>,
107+
err: &mut Diag<'_, E>,
108+
utf8err: Utf8Error,
109+
contents: &[u8],
110+
) {
111+
// The file exists, but it wasn't valid UTF-8.
112+
let start = utf8err.valid_up_to();
113+
let note = format!("invalid utf-8 at byte `{start}`");
114+
let msg = if let Some(len) = utf8err.error_len() {
115+
format!("`{:?}` is not valid utf-8", &contents[start..start + len])
116+
} else {
117+
note.clone()
118+
};
119+
let contents = String::from_utf8_lossy(contents).to_string();
120+
let source = sm.new_source_file(PathBuf::from(path).into(), contents);
121+
let span = Span::with_root_ctxt(
122+
source.normalized_byte_pos(start as u32),
123+
source.normalized_byte_pos(start as u32),
124+
);
125+
if span.is_dummy() {
126+
err.note(note);
127+
} else {
128+
if sp.is_some() {
129+
err.span_note(span, msg);
130+
} else {
131+
err.span(span);
132+
err.span_label(span, msg);
133+
}
134+
}
135+
}
136+
116137
/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
117138
/// the initial token stream.
118139
fn new_parser_from_source_file(

0 commit comments

Comments
 (0)