Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 47 additions & 14 deletions compiler/rustc_parse/src/lexer/unescape_error_reporting.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
use std::iter::once;
use std::ops::Range;

use rustc_errors::{Applicability, Handler};
use rustc_errors::{pluralize, Applicability, Handler};
use rustc_lexer::unescape::{EscapeError, Mode};
use rustc_span::{BytePos, Span};

Expand Down Expand Up @@ -49,24 +49,57 @@ pub(crate) fn emit_unescape_error(
.emit();
}
EscapeError::MoreThanOneChar => {
let (prefix, msg) = if mode.is_bytes() {
("b", "if you meant to write a byte string literal, use double quotes")
} else {
("", "if you meant to write a `str` literal, use double quotes")
};
use unicode_normalization::{char::is_combining_mark, UnicodeNormalization};

handler
.struct_span_err(
span_with_quotes,
"character literal may only contain one codepoint",
)
.span_suggestion(
let mut has_help = false;
let mut handler = handler.struct_span_err(
span_with_quotes,
"character literal may only contain one codepoint",
);

if lit.chars().skip(1).all(|c| is_combining_mark(c)) {
let escaped_marks =
lit.chars().skip(1).map(|c| c.escape_default().to_string()).collect::<Vec<_>>();
handler.span_note(
span,
&format!(
"this `{}` is followed by the combining mark{} `{}`",
lit.chars().next().unwrap(),
pluralize!(escaped_marks.len()),
escaped_marks.join(""),
),
);
let normalized = lit.nfc().to_string();
if normalized.chars().count() == 1 {
has_help = true;
handler.span_suggestion(
span,
&format!(
"consider using the normalized form `{}` of this character",
normalized.chars().next().unwrap().escape_default()
),
normalized,
Applicability::MachineApplicable,
);
}
}

if !has_help {
let (prefix, msg) = if mode.is_bytes() {
("b", "if you meant to write a byte string literal, use double quotes")
} else {
("", "if you meant to write a `str` literal, use double quotes")
};

handler.span_suggestion(
span_with_quotes,
msg,
format!("{}\"{}\"", prefix, lit),
Applicability::MachineApplicable,
)
.emit();
);
}

handler.emit();
}
EscapeError::EscapeOnlyChar => {
let (c, char_span) = last_char();
Expand Down
21 changes: 21 additions & 0 deletions src/test/ui/parser/unicode-character-literal.fixed
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Regression test for #88684: Improve diagnostics for combining marks
// in character literals.

// run-rustfix

fn main() {
let _spade = "♠️";
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `♠` is followed by the combining mark `\u{fe0f}`
//~| HELP: if you meant to write a `str` literal, use double quotes

let _s = "ṩ̂̊";
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
//~| HELP: if you meant to write a `str` literal, use double quotes

let _a = 'Å';
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `A` is followed by the combining mark `\u{30a}`
//~| HELP: consider using the normalized form `\u{c5}` of this character
}
21 changes: 21 additions & 0 deletions src/test/ui/parser/unicode-character-literal.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
// Regression test for #88684: Improve diagnostics for combining marks
// in character literals.

// run-rustfix

fn main() {
let _spade = '♠️';
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `♠` is followed by the combining mark `\u{fe0f}`
//~| HELP: if you meant to write a `str` literal, use double quotes

let _s = 'ṩ̂̊';
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
//~| HELP: if you meant to write a `str` literal, use double quotes

let _a = 'Å';
//~^ ERROR: character literal may only contain one codepoint
//~| NOTE: this `A` is followed by the combining mark `\u{30a}`
//~| HELP: consider using the normalized form `\u{c5}` of this character
}
48 changes: 48 additions & 0 deletions src/test/ui/parser/unicode-character-literal.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
error: character literal may only contain one codepoint
--> $DIR/unicode-character-literal.rs:7:18
|
LL | let _spade = '♠️';
| ^^^
|
note: this `♠` is followed by the combining mark `\u{fe0f}`
--> $DIR/unicode-character-literal.rs:7:19
|
LL | let _spade = '♠️';
| ^
help: if you meant to write a `str` literal, use double quotes
|
LL | let _spade = "♠️";
| ~~~

error: character literal may only contain one codepoint
--> $DIR/unicode-character-literal.rs:12:14
|
LL | let _s = 'ṩ̂̊';
| ^^^
|
note: this `s` is followed by the combining marks `\u{323}\u{307}\u{302}\u{30a}`
--> $DIR/unicode-character-literal.rs:12:15
|
LL | let _s = 'ṩ̂̊';
| ^
help: if you meant to write a `str` literal, use double quotes
|
LL | let _s = "ṩ̂̊";
| ~~~

error: character literal may only contain one codepoint
--> $DIR/unicode-character-literal.rs:17:14
|
LL | let _a = 'Å';
| ^-^
| |
| help: consider using the normalized form `\u{c5}` of this character: `Å`
|
note: this `A` is followed by the combining mark `\u{30a}`
--> $DIR/unicode-character-literal.rs:17:15
|
LL | let _a = 'Å';
| ^

error: aborting due to 3 previous errors