Fixed buggy boundary highlighting

This commit is contained in:
Joshua Barretto 2025-09-24 14:32:34 +01:00
parent 9c002841ff
commit ed2eece91b
2 changed files with 35 additions and 27 deletions

View file

@ -78,24 +78,22 @@ impl Highlighter {
let mut tokens = Vec::new();
let mut i = 0;
loop {
let n = if let Some((idx, n)) = self
i = if let Some((idx, n)) = self
.matchers
.iter()
.enumerate()
.find_map(|(i, r)| Some((i, r.matches(s)?)))
.find_map(|(idx, r)| Some((idx, r.matches(s, i)?)))
{
tokens.push(Token {
kind: self.entries[idx],
range: i..i + n,
range: i..n,
});
n
} else if !s.is_empty() {
1
} else if i < s.len() {
i + 1
} else {
break;
};
i += n;
s = &s[n..];
}
tokens
}
@ -259,10 +257,10 @@ impl State<'_> {
}
impl Regex {
fn matches(&self, s: &[char]) -> Option<usize> {
fn matches(&self, s: &[char], at: usize) -> Option<usize> {
let mut s = State {
s,
pos: 0,
pos: at,
delim: None,
};
s.go(self).map(|_| s.pos)
@ -274,6 +272,14 @@ use chumsky::{
prelude::*,
};
#[test]
fn regex() {
let reg = Regex::parser().parse(r"\b[0-9][A-Za-z0-9_\.]*\b").unwrap();
dbg!(&reg);
assert!(reg.matches(&"5".chars().collect::<Vec<_>>()).is_some());
panic!("done");
}
impl Regex {
fn parser<'a>() -> impl Parser<'a, &'a str, Self, extra::Err<Rich<'a, char>>> {
recursive(|regex| {
@ -293,14 +299,14 @@ impl Regex {
let items = regex.clone().repeated().collect();
let atom = choice((
range,
char_.map(Self::Char),
just("\\b").to(Self::WordBoundary),
just("^").to(Self::LineStart),
just("$").to(Self::LineEnd),
just("~").to(Self::LastDelim),
// Classes
just("[[:space:]]").map(|_| Self::Whitespace),
range,
char_.map(Self::Char),
items
.clone()
.delimited_by(just("[^"), just(']'))
@ -333,14 +339,3 @@ impl Regex {
.map(Self::Group)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple() {
let hl = Highlighter::rust().highlight("pub");
assert_eq!(hl.tokens, Vec::new());
}
}

View file

@ -14,7 +14,7 @@ impl LangPack {
file_name.file_name().and_then(|e| e.to_str()).unwrap_or(""),
file_name.extension().and_then(|e| e.to_str()).unwrap_or(""),
) {
(_, "rs") => Self {
(_, "rs" | "ron") => Self {
highlighter: Highlighter::default().rust().git(),
comment_syntax: Some(vec!['/', '/', ' ']),
},
@ -26,7 +26,7 @@ impl LangPack {
highlighter: Highlighter::default().toml().git(),
comment_syntax: Some(vec!['#', ' ']),
},
(_, "c" | "h" | "cpp" | "hpp" | "cxx" | "js" | "ts" | "go") => Self {
(_, "c" | "h" | "cpp" | "hpp" | "cxx" | "js" | "ts" | "go" | "sh") => Self {
highlighter: Highlighter::default().generic_clike().git(),
comment_syntax: Some(vec!['/', '/', ' ']),
},
@ -46,6 +46,13 @@ impl LangPack {
highlighter: Highlighter::default().makefile().git(),
comment_syntax: Some(vec!['#', ' ']),
},
(_, "proto" | "json") => Self {
highlighter: Highlighter::default()
.clike_comments()
.generic_delimited()
.git(),
comment_syntax: Some(vec!['/', '/', ' ']),
},
_ => Self {
highlighter: Highlighter::default().git(),
comment_syntax: None,
@ -120,7 +127,7 @@ impl Highlighter {
// Lifetimes
.with(TokenKind::Special, r"'[a-z_][A-Za-z0-9_]*\b")
.with(TokenKind::Ident, r"\b[a-z_][A-Za-z0-9_]*\b")
.with(TokenKind::Number, r"[0-9][A-Za-z0-9_\.]*")
.with(TokenKind::Number, r"\b[0-9][A-Za-z0-9_\.]*\b")
.with(TokenKind::Delimiter, r"[\{\}\(\)\[\]]")
.with(TokenKind::Macro, r"[\{\}\(\)\[\]]")
.with(TokenKind::Attribute, r"#!?\[[^\]]*\]")
@ -143,6 +150,12 @@ impl Highlighter {
self.with(TokenKind::Macro, r"^#[^$]*$")
}
pub fn generic_delimited(self) -> Self {
self.with(TokenKind::String, r#""[(\\")[^"]]*""#)
.with(TokenKind::Delimiter, r"[\{\}\(\)\[\]]")
.with(TokenKind::Number, r"\b[0-9][A-Za-z0-9_\.]*\b")
}
pub fn clike(self) -> Self {
self
.with(TokenKind::Constant, r"\b[(true)(false)]\b")
@ -163,7 +176,7 @@ impl Highlighter {
// Paths: std::foo::bar
.with(TokenKind::Property, r"[A-Za-z_][A-Za-z0-9_]*::")
.with(TokenKind::Ident, r"\b[a-z_][A-Za-z0-9_]*\b")
.with(TokenKind::Number, r"[0-9][A-Za-z0-9_\.]*")
.with(TokenKind::Number, r"\b[0-9][A-Za-z0-9_\.]*\b")
.with(TokenKind::Delimiter, r"[\{\}\(\)\[\]]")
}
@ -226,7 +239,7 @@ impl Highlighter {
pub fn toml(self) -> Self {
self
// Header
.with(TokenKind::Doc, r#"^\[[^\n\]]*\]$"#)
.with(TokenKind::Doc, r#"^\[[^\n\]]*\]"#)
// Delimiters
.with(TokenKind::Delimiter, r"[\{\}\(\)\[\]]")
// Operators