alacritty

Unnamed repository; edit this file 'description' to name the repository.
Log | Files | Refs | README

commit d8272662db4a4dc1ef58b8379dc88162066a3241
parent d29c30900743f3e7f864f7951edf34c7423accd8
Author: Christian Duerr <chrisduerr@users.noreply.github.com>
Date:   Sat, 23 Mar 2019 11:56:46 +0000

Fix URL parsing with double-width characters

Since double-width characters are followed by an empty cell containing
only the `WIDE_CELL_SPACER` flag, the URL parser would stop once
encountering the cell after a double-width character.

By skipping cells that contain the `WIDE_CELL_SPACER` flag and
incrementing the URL length by unicode width of the character instead of
cell count, this can be resolved for both URL launching and URL
highlighting.

Fixes #2158.

Diffstat:
MCHANGELOG.md | 1+
Msrc/input.rs | 6+++---
Msrc/term/mod.rs | 4++--
Msrc/url.rs | 83+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
4 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md @@ -47,6 +47,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fixes increase/decrease font-size keybindings on international keyboards - On Wayland, the `--title` flag will set the Window title now - Parsing issues with URLs starting in the first or ending in the last column +- URLs stopping at double-width characters ## Version 0.2.9 diff --git a/src/input.rs b/src/input.rs @@ -447,7 +447,7 @@ impl<'a, A: ActionContext + 'a> Processor<'a, A> { None }; - if let Some(Url { text, origin }) = url { + if let Some(Url { origin, len, .. }) = url { let mouse_cursor = if self.ctx.terminal().mode().intersects(mouse_mode) { MouseCursor::Default } else { @@ -473,9 +473,9 @@ impl<'a, A: ActionContext + 'a> Processor<'a, A> { } // Underline all cells and store their current underline state - let mut underlined = Vec::with_capacity(text.len()); + let mut underlined = Vec::with_capacity(len); let iter = once(start).chain(start.iter(Column(cols - 1), last_line)); - for point in iter.take(text.len()) { + for point in iter.take(len) { let cell = &mut self.ctx.terminal_mut().grid_mut()[point.line][point.col]; underlined.push(cell.flags.contains(Flags::UNDERLINE)); cell.flags.insert(Flags::UNDERLINE); diff --git a/src/term/mod.rs b/src/term/mod.rs @@ -120,14 +120,14 @@ impl Search for Term { let mut url_parser = UrlParser::new(); while let Some(cell) = iterb.prev() { if (iterb.cur().col == last_col && !cell.flags.contains(cell::Flags::WRAPLINE)) - || url_parser.advance_left(cell.c) + || url_parser.advance_left(cell) { break; } } while let Some(cell) = iterf.next() { - if url_parser.advance_right(cell.c) + if url_parser.advance_right(cell) || (iterf.cur().col == last_col && !cell.flags.contains(cell::Flags::WRAPLINE)) { break; diff --git a/src/url.rs b/src/url.rs @@ -14,6 +14,8 @@ use url; +use crate::term::cell::{Cell, Flags}; + // See https://tools.ietf.org/html/rfc3987#page-13 const URL_SEPARATOR_CHARS: [char; 10] = ['<', '>', '"', ' ', '{', '}', '|', '\\', '^', '`']; const URL_DENY_END_CHARS: [char; 8] = ['.', ',', ';', ':', '?', '!', '/', '(']; @@ -26,12 +28,14 @@ const URL_SCHEMES: [&str; 8] = [ pub struct Url { pub text: String, pub origin: usize, + pub len: usize, } /// Parser for streaming inside-out detection of URLs. pub struct UrlParser { state: String, origin: usize, + len: usize, } impl UrlParser { @@ -39,22 +43,40 @@ impl UrlParser { UrlParser { state: String::new(), origin: 0, + len: 0, } } /// Advance the parser one character to the left. - pub fn advance_left(&mut self, c: char) -> bool { - if self.advance(c, 0) { + pub fn advance_left(&mut self, cell: &Cell) -> bool { + if cell.flags.contains(Flags::WIDE_CHAR_SPACER) { + self.origin += 1; + self.len += 1; + return false; + } + + if self.advance(cell.c, 0) { true } else { self.origin += 1; + self.len += 1; false } } /// Advance the parser one character to the right. - pub fn advance_right(&mut self, c: char) -> bool { - self.advance(c, self.state.len()) + pub fn advance_right(&mut self, cell: &Cell) -> bool { + if cell.flags.contains(Flags::WIDE_CHAR_SPACER) { + self.len += 1; + return false; + } + + if self.advance(cell.c, self.state.len()) { + true + } else { + self.len += 1; + false + } } /// Returns the URL if the parser has found any. @@ -116,8 +138,9 @@ impl UrlParser { Ok(url) => { if URL_SCHEMES.contains(&url.scheme()) && self.origin > 0 { Some(Url { - text: self.state, origin: self.origin - 1, + text: self.state, + len: self.len, }) } else { None @@ -144,10 +167,12 @@ impl UrlParser { mod tests { use std::mem; + use unicode_width::UnicodeWidthChar; + use crate::grid::Grid; use crate::index::{Column, Line, Point}; use crate::term::{Search, SizeInfo, Term}; - use crate::term::cell::Cell; + use crate::term::cell::{Cell, Flags}; use crate::message_bar::MessageBuffer; fn url_create_term(input: &str) -> Term { @@ -161,11 +186,22 @@ mod tests { dpr: 1.0, }; + let width = input.chars().map(|c| if c.width() == Some(2) { 2 } else { 1 }).sum(); let mut term = Term::new(&Default::default(), size, MessageBuffer::new()); - let mut grid: Grid<Cell> = Grid::new(Line(1), Column(input.len()), 0, Cell::default()); + let mut grid: Grid<Cell> = Grid::new(Line(1), Column(width), 0, Cell::default()); - for (i, c) in input.chars().enumerate() { + let mut i = 0; + for c in input.chars() { grid[Line(0)][Column(i)].c = c; + + if c.width() == Some(2) { + grid[Line(0)][Column(i)].flags.insert(Flags::WIDE_CHAR); + grid[Line(0)][Column(i + 1)].flags.insert(Flags::WIDE_CHAR_SPACER); + grid[Line(0)][Column(i + 1)].c = ' '; + i += 1; + } + + i += 1; } mem::swap(term.grid_mut(), &mut grid); @@ -199,6 +235,37 @@ mod tests { let term = url_create_term("https://example.org"); let url = term.url_search(Point::new(0, Column(0))); assert_eq!(url.map(|u| u.origin), Some(0)); + + let term = url_create_term("https://全.org"); + let url = term.url_search(Point::new(0, Column(10))); + assert_eq!(url.map(|u| u.origin), Some(10)); + + let term = url_create_term("https://全.org"); + let url = term.url_search(Point::new(0, Column(8))); + assert_eq!(url.map(|u| u.origin), Some(8)); + + let term = url_create_term("https://全.org"); + let url = term.url_search(Point::new(0, Column(9))); + assert_eq!(url.map(|u| u.origin), Some(9)); + } + + #[test] + fn url_len() { + let term = url_create_term(" test https://example.org "); + let url = term.url_search(Point::new(0, Column(10))); + assert_eq!(url.map(|u| u.len), Some(19)); + + let term = url_create_term("https://全.org"); + let url = term.url_search(Point::new(0, Column(0))); + assert_eq!(url.map(|u| u.len), Some(14)); + + let term = url_create_term("https://全.org"); + let url = term.url_search(Point::new(0, Column(10))); + assert_eq!(url.map(|u| u.len), Some(14)); + + let term = url_create_term("https://全.org"); + let url = term.url_search(Point::new(0, Column(9))); + assert_eq!(url.map(|u| u.len), Some(14)); } #[test]