Fixed buggy boundary highlighting

This commit is contained in:
Joshua Barretto 2025-09-24 14:32:34 +01:00
parent 9c002841ff
commit ed2eece91b
2 changed files with 35 additions and 27 deletions

View file

@ -78,24 +78,22 @@ impl Highlighter {
let mut tokens = Vec::new(); let mut tokens = Vec::new();
let mut i = 0; let mut i = 0;
loop { loop {
let n = if let Some((idx, n)) = self i = if let Some((idx, n)) = self
.matchers .matchers
.iter() .iter()
.enumerate() .enumerate()
.find_map(|(i, r)| Some((i, r.matches(s)?))) .find_map(|(idx, r)| Some((idx, r.matches(s, i)?)))
{ {
tokens.push(Token { tokens.push(Token {
kind: self.entries[idx], kind: self.entries[idx],
range: i..i + n, range: i..n,
}); });
n n
} else if !s.is_empty() { } else if i < s.len() {
1 i + 1
} else { } else {
break; break;
}; };
i += n;
s = &s[n..];
} }
tokens tokens
} }
@ -259,10 +257,10 @@ impl State<'_> {
} }
impl Regex { impl Regex {
fn matches(&self, s: &[char]) -> Option<usize> { fn matches(&self, s: &[char], at: usize) -> Option<usize> {
let mut s = State { let mut s = State {
s, s,
pos: 0, pos: at,
delim: None, delim: None,
}; };
s.go(self).map(|_| s.pos) s.go(self).map(|_| s.pos)
@ -274,6 +272,14 @@ use chumsky::{
prelude::*, prelude::*,
}; };
#[test]
fn regex() {
let reg = Regex::parser().parse(r"\b[0-9][A-Za-z0-9_\.]*\b").unwrap();
dbg!(&reg);
assert!(reg.matches(&"5".chars().collect::<Vec<_>>()).is_some());
panic!("done");
}
impl Regex { impl Regex {
fn parser<'a>() -> impl Parser<'a, &'a str, Self, extra::Err<Rich<'a, char>>> { fn parser<'a>() -> impl Parser<'a, &'a str, Self, extra::Err<Rich<'a, char>>> {
recursive(|regex| { recursive(|regex| {
@ -293,14 +299,14 @@ impl Regex {
let items = regex.clone().repeated().collect(); let items = regex.clone().repeated().collect();
let atom = choice(( let atom = choice((
range,
char_.map(Self::Char),
just("\\b").to(Self::WordBoundary), just("\\b").to(Self::WordBoundary),
just("^").to(Self::LineStart), just("^").to(Self::LineStart),
just("$").to(Self::LineEnd), just("$").to(Self::LineEnd),
just("~").to(Self::LastDelim), just("~").to(Self::LastDelim),
// Classes // Classes
just("[[:space:]]").map(|_| Self::Whitespace), just("[[:space:]]").map(|_| Self::Whitespace),
range,
char_.map(Self::Char),
items items
.clone() .clone()
.delimited_by(just("[^"), just(']')) .delimited_by(just("[^"), just(']'))
@ -333,14 +339,3 @@ impl Regex {
.map(Self::Group) .map(Self::Group)
} }
} }
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple() {
let hl = Highlighter::rust().highlight("pub");
assert_eq!(hl.tokens, Vec::new());
}
}

View file

@ -14,7 +14,7 @@ impl LangPack {
file_name.file_name().and_then(|e| e.to_str()).unwrap_or(""), file_name.file_name().and_then(|e| e.to_str()).unwrap_or(""),
file_name.extension().and_then(|e| e.to_str()).unwrap_or(""), file_name.extension().and_then(|e| e.to_str()).unwrap_or(""),
) { ) {
(_, "rs") => Self { (_, "rs" | "ron") => Self {
highlighter: Highlighter::default().rust().git(), highlighter: Highlighter::default().rust().git(),
comment_syntax: Some(vec!['/', '/', ' ']), comment_syntax: Some(vec!['/', '/', ' ']),
}, },
@ -26,7 +26,7 @@ impl LangPack {
highlighter: Highlighter::default().toml().git(), highlighter: Highlighter::default().toml().git(),
comment_syntax: Some(vec!['#', ' ']), comment_syntax: Some(vec!['#', ' ']),
}, },
(_, "c" | "h" | "cpp" | "hpp" | "cxx" | "js" | "ts" | "go") => Self { (_, "c" | "h" | "cpp" | "hpp" | "cxx" | "js" | "ts" | "go" | "sh") => Self {
highlighter: Highlighter::default().generic_clike().git(), highlighter: Highlighter::default().generic_clike().git(),
comment_syntax: Some(vec!['/', '/', ' ']), comment_syntax: Some(vec!['/', '/', ' ']),
}, },
@ -46,6 +46,13 @@ impl LangPack {
highlighter: Highlighter::default().makefile().git(), highlighter: Highlighter::default().makefile().git(),
comment_syntax: Some(vec!['#', ' ']), comment_syntax: Some(vec!['#', ' ']),
}, },
(_, "proto" | "json") => Self {
highlighter: Highlighter::default()
.clike_comments()
.generic_delimited()
.git(),
comment_syntax: Some(vec!['/', '/', ' ']),
},
_ => Self { _ => Self {
highlighter: Highlighter::default().git(), highlighter: Highlighter::default().git(),
comment_syntax: None, comment_syntax: None,
@ -120,7 +127,7 @@ impl Highlighter {
// Lifetimes // Lifetimes
.with(TokenKind::Special, r"'[a-z_][A-Za-z0-9_]*\b") .with(TokenKind::Special, r"'[a-z_][A-Za-z0-9_]*\b")
.with(TokenKind::Ident, r"\b[a-z_][A-Za-z0-9_]*\b") .with(TokenKind::Ident, r"\b[a-z_][A-Za-z0-9_]*\b")
.with(TokenKind::Number, r"[0-9][A-Za-z0-9_\.]*") .with(TokenKind::Number, r"\b[0-9][A-Za-z0-9_\.]*\b")
.with(TokenKind::Delimiter, r"[\{\}\(\)\[\]]") .with(TokenKind::Delimiter, r"[\{\}\(\)\[\]]")
.with(TokenKind::Macro, r"[\{\}\(\)\[\]]") .with(TokenKind::Macro, r"[\{\}\(\)\[\]]")
.with(TokenKind::Attribute, r"#!?\[[^\]]*\]") .with(TokenKind::Attribute, r"#!?\[[^\]]*\]")
@ -143,6 +150,12 @@ impl Highlighter {
self.with(TokenKind::Macro, r"^#[^$]*$") self.with(TokenKind::Macro, r"^#[^$]*$")
} }
pub fn generic_delimited(self) -> Self {
self.with(TokenKind::String, r#""[(\\")[^"]]*""#)
.with(TokenKind::Delimiter, r"[\{\}\(\)\[\]]")
.with(TokenKind::Number, r"\b[0-9][A-Za-z0-9_\.]*\b")
}
pub fn clike(self) -> Self { pub fn clike(self) -> Self {
self self
.with(TokenKind::Constant, r"\b[(true)(false)]\b") .with(TokenKind::Constant, r"\b[(true)(false)]\b")
@ -163,7 +176,7 @@ impl Highlighter {
// Paths: std::foo::bar // Paths: std::foo::bar
.with(TokenKind::Property, r"[A-Za-z_][A-Za-z0-9_]*::") .with(TokenKind::Property, r"[A-Za-z_][A-Za-z0-9_]*::")
.with(TokenKind::Ident, r"\b[a-z_][A-Za-z0-9_]*\b") .with(TokenKind::Ident, r"\b[a-z_][A-Za-z0-9_]*\b")
.with(TokenKind::Number, r"[0-9][A-Za-z0-9_\.]*") .with(TokenKind::Number, r"\b[0-9][A-Za-z0-9_\.]*\b")
.with(TokenKind::Delimiter, r"[\{\}\(\)\[\]]") .with(TokenKind::Delimiter, r"[\{\}\(\)\[\]]")
} }
@ -226,7 +239,7 @@ impl Highlighter {
pub fn toml(self) -> Self { pub fn toml(self) -> Self {
self self
// Header // Header
.with(TokenKind::Doc, r#"^\[[^\n\]]*\]$"#) .with(TokenKind::Doc, r#"^\[[^\n\]]*\]"#)
// Delimiters // Delimiters
.with(TokenKind::Delimiter, r"[\{\}\(\)\[\]]") .with(TokenKind::Delimiter, r"[\{\}\(\)\[\]]")
// Operators // Operators