Better Rust highlighter support, markdown support

This commit is contained in:
Joshua Barretto 2025-06-15 23:17:15 +01:00
parent a64884d894
commit 3e3755c0b5
6 changed files with 235 additions and 64 deletions

10
Cargo.lock generated
View file

@ -455,6 +455,15 @@ dependencies = [
"syn",
]
[[package]]
name = "unicode-display-width"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a43273b656140aa2bb8e65351fe87c255f0eca706b2538a9bd4a590a3490bf3"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "unicode-ident"
version = "1.0.12"
@ -648,4 +657,5 @@ dependencies = [
"crossterm",
"slotmap",
"thiserror",
"unicode-display-width",
]

View file

@ -9,6 +9,7 @@ slotmap = "1.0"
crossterm = "0.27"
thiserror = "1.0"
chumsky = { version = "0.10.1", features = ["pratt"] }
unicode-display-width = "0.3.0"
[profile.dev]
opt-level = 2

View file

@ -2,11 +2,36 @@ use std::{ops::Range, path::Path};
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum TokenKind {
/// Non-structural whitespace
Whitespace,
/// Identifiers and names
Ident,
/// Syntax keywords
Keyword,
/// Numeric literals
Number,
/// Types or type definitions
Type,
/// Comments, which have no effect on the code
Comment,
/// Documentation or doc comments
Doc,
/// Operators that perform work on operands
Operator,
/// Structural tokens (parentheses, braces, brackets, etc.)
Delimiter,
/// A field or method of another value (i.e: a named thing not present in the current namespace)
Property,
/// A special attribute or decorator attached to some other code
Attribute,
/// A macro, that transforms the code in some manner
Macro,
/// A string literal
String,
/// Misc special syntax (defined per-language)
Special,
/// A program constant or other statically-known name
Constant,
}
pub struct Highlighter {
@ -26,32 +51,88 @@ impl Highlighter {
.map(|p| Regex::parser().parse(p.as_ref()).unwrap())
.collect();
Self {
entries,
/*regex: meta::Regex::new_many(&patterns).unwrap(),*/ matchers,
}
Self { entries, matchers }
}
pub fn from_file_name(file_name: &Path) -> Option<Self> {
match file_name.extension()?.to_str()? {
"rs" => Some(Self::rust()),
"md" => Some(Self::markdown()),
_ => None,
}
}
pub fn rust() -> Self {
pub fn markdown() -> Self {
Self::new_from_regex([
(
TokenKind::Keyword,
r"\b[(pub)(enum)(let)(self)(Self)(fn)(impl)(struct)(use)(if)(while)(for)(loop)(mod)]\b",
),
(TokenKind::Ident, r"[a-z_][A-Za-z0-9_]*"),
(TokenKind::Type, r"[A-Z_][A-Za-z0-9_]*"),
(TokenKind::Number, r"[0-9][A-Za-z0-9_]*"),
// Links
(TokenKind::String, r"\[[^\]]*\](\([^\)]*\))?"),
// Header
(TokenKind::Doc, r"^#+[[:space:]][^$]*$"),
// List item
(TokenKind::Operator, r"^[[:space:]]?[\-([0-9]+[\)\.])]"),
// Bold
(TokenKind::Property, r"\*\*[^(\*\*)]*\*\*"),
// Italics
(TokenKind::Attribute, r"\*[^\*]*\*"),
// Code block
(TokenKind::Operator, r"^```[^(^```)]*^```"),
// Inline code
(TokenKind::Constant, r"`[^`$]*[`$]"),
// HTML
(TokenKind::Special, r"<[^<>]*>"),
])
}
fn highlight_str(&self, mut s: &str) -> Vec<(Range<usize>, TokenKind)> {
pub fn rust() -> Self {
Self::new_from_regex([
(TokenKind::Doc, r"\/\/[\/!][^\n]*$"),
(TokenKind::Comment, r"\/\/[^$]*$"),
// Multi-line comment
(TokenKind::Comment, r"\/\*[^(\*\/)]*\*\/"),
(
TokenKind::Keyword,
r"\b[(pub)(enum)(let)(self)(Self)(fn)(impl)(struct)(use)(if)(while)(for)(in)(loop)(mod)(match)(else)(break)(continue)(trait)(const)(static)(type)(mut)(as)(crate)(extern)(move)(ref)(return)(super)(unsafe)(use)(where)(async)(dyn)(try)(gen)(macro_rules)(union)(raw)]\b",
),
(TokenKind::Constant, r"\b[(true)(false)]\b"),
// Flow-control operators count as keywords
(TokenKind::Keyword, r"\b[(\.await)\?]\b"),
// Macro invocations: println!
(TokenKind::Macro, r"\b[A-Za-z_][A-Za-z0-9_]*!"),
// Meta-variables
(TokenKind::Macro, r"\$[A-Za-z_][A-Za-z0-9_]*\b"),
(TokenKind::Constant, r"\b[A-Z][A-Z0-9_]+\b"),
(TokenKind::Type, r"\b[A-Z][A-Za-z0-9_]*\b"),
// Primitives
(
TokenKind::Type,
r"\b[(u8)(u16)(u32)(u64)(u128)(i8)(i16)(i32)(i64)(i128)(usize)(isize)(bool)(str)(char)]\b",
),
// "foo" or b"foo" or r#"foo"#
(TokenKind::String, r#"b?r?(#*)@("[(\\")[^("~)]]*("~))"#),
// Characters
(
TokenKind::String,
r#"b?'[(\\[nrt\\0(x[0-7A-Za-z][0-7A-Za-z])])[^']]'"#,
),
(
TokenKind::Operator,
r"[(&(mut)?)(\+=?)(\-=?)(\*=?)(\/=?)(%=?)(!=?)(==?)(&&?=?)(\|\|?=?)(<<?=?)(>>?=?)(\.\.[\.=]?)\\\~\^:;,\@(=>?)]",
),
// Fields and methods: a.foo
(TokenKind::Property, r"\.[a-z_][A-Za-z0-9_]*"),
// Paths: std::foo::bar
(TokenKind::Property, r"[A-Za-z_][A-Za-z0-9_]*::"),
// Lifetimes
(TokenKind::Special, r"'[a-z_][A-Za-z0-9_]*\b"),
(TokenKind::Ident, r"\b[a-z_][A-Za-z0-9_]*\b"),
(TokenKind::Number, r"[0-9][A-Za-z0-9_\.]*"),
(TokenKind::Delimiter, r"[\{\}\(\)\[\]]"),
(TokenKind::Macro, r"[\{\}\(\)\[\]]"),
(TokenKind::Attribute, r"#!?\[[^\]]*\]"),
])
}
fn highlight_str(&self, mut s: &[char]) -> Vec<(Range<usize>, TokenKind)> {
let mut tokens = Vec::new();
let mut i = 0;
loop {
@ -63,8 +144,8 @@ impl Highlighter {
{
tokens.push((i..i + n, self.entries[idx]));
n
} else if let Some((n, _)) = s.char_indices().nth(1) {
n
} else if !s.is_empty() {
1
} else {
break;
};
@ -74,7 +155,7 @@ impl Highlighter {
tokens
}
pub fn highlight(self, s: &str) -> Highlights {
pub fn highlight(self, s: &[char]) -> Highlights {
let tokens = self.highlight_str(s);
Highlights {
highlighter: self,
@ -107,32 +188,40 @@ impl Highlights {
pub enum Regex {
Whitespace,
WordBoundary,
LineStart,
LineEnd,
LastDelim,
Range(char, char),
Char(char),
Set(Vec<Self>),
NegSet(Vec<Self>),
Group(Vec<Self>),
// (at_least, _)
Many(usize, Box<Self>),
// (at_least, at_most, _)
Many(usize, usize, Box<Self>),
// (delimiter, x) - delimit x with `delimiter` on either side (used for raw strings)
Delim(Box<Self>, Box<Self>),
}
struct State<'a> {
s: &'a str,
s: &'a [char],
pos: usize,
delim: Option<&'a [char]>,
}
impl State<'_> {
fn peek(&self) -> Option<char> {
self.s[self.pos..].chars().next()
self.s.get(self.pos).copied()
}
fn prev(&self) -> Option<char> {
self.s[..self.pos].chars().rev().next()
self.s[..self.pos].last().copied()
// self.s.get(self.pos.saturating_sub(1)).copied()
}
fn skip(&mut self) {
if let Some(c) = self.peek() {
self.pos += c.len_utf8();
}
fn skip_if(&mut self, f: impl FnOnce(char) -> bool) -> Option<()> {
self.peek().filter(|c| f(*c))?;
self.pos += 1;
Some(())
}
fn attempt(&mut self, r: &Regex) -> Option<()> {
@ -152,26 +241,32 @@ impl State<'_> {
(is_word(self.prev().unwrap_or(' ')) != is_word(self.peek().unwrap_or(' ')))
.then_some(())
}
Regex::Char(c) => {
if self.peek()? == *c {
self.skip();
Regex::LineStart => self.prev().map_or(true, |c| c == '\n').then_some(()),
Regex::LineEnd => self.peek().map_or(true, |c| c == '\n').then_some(()),
Regex::LastDelim => {
if self.s[self.pos..].starts_with(self.delim?) {
self.pos += self.delim.unwrap().len();
Some(())
} else {
None
}
}
Regex::Char(x) => self.skip_if(|c| c == *x),
Regex::Whitespace => {
let mut once = false;
while let Some(c) = self.peek() {
if c.is_ascii_whitespace() {
self.skip();
once = true;
} else {
break;
}
while self.skip_if(|c| c.is_ascii_whitespace()).is_some() {
once = true;
}
once.then_some(())
}
Regex::NegSet(xs) => {
if xs.iter().all(|x| self.attempt(x).is_none()) {
self.skip_if(|_| true)?;
Some(())
} else {
None
}
}
Regex::Set(xs) => xs.iter().find_map(|x| self.attempt(x)),
Regex::Group(xs) => {
for x in xs {
@ -179,72 +274,101 @@ impl State<'_> {
}
Some(())
}
Regex::Range(a, b) => {
if (a..=b).contains(&&self.peek()?) {
self.skip();
Some(())
} else {
None
}
}
Regex::Many(at_least, x) => {
Regex::Range(a, b) => self.skip_if(|c| (a..=b).contains(&&c)),
Regex::Many(at_least, at_most, x) => {
let mut times = 0;
loop {
if self.attempt(x).is_none() {
let pos = self.pos;
if times >= *at_most {
break;
} else if self.attempt(x).is_none() {
break;
}
assert_ne!(pos, self.pos, "{x:?}");
times += 1;
}
if times >= *at_least { Some(()) } else { None }
}
r => todo!("{r:?}"),
Regex::Delim(d, r) => {
let old_pos = self.pos;
self.go(d)?;
let old_delim = self.delim.replace(&self.s[old_pos..self.pos]);
let res = self.go(r);
self.delim = old_delim;
res
}
}
}
}
impl Regex {
fn matches(&self, s: &str) -> Option<usize> {
let mut s = State { s, pos: 0 };
fn matches(&self, s: &[char]) -> Option<usize> {
let mut s = State {
s,
pos: 0,
delim: None,
};
s.go(self).map(|_| s.pos)
}
}
use chumsky::{pratt::postfix, prelude::*};
use chumsky::{
pratt::{infix, left, postfix},
prelude::*,
};
impl Regex {
fn parser<'a>() -> impl Parser<'a, &'a str, Self, extra::Err<Rich<'a, char>>> {
recursive(|regex| {
let char_ = any().filter(|c: &char| c.is_alphanumeric() || *c == '_');
let metachars = r"{}[]()^$.|*+-?\/@~";
let char_ = choice((
none_of(metachars),
// Escaped meta characters
just('\\').ignore_then(one_of(metachars)),
just("\\n").to('\n'),
));
let range = char_
.then_ignore(just('-'))
.then(char_)
.map(|(a, b)| Self::Range(a, b));
let items = regex.clone().repeated().collect();
let atom = choice((
range,
char_.map(Self::Char),
just("\\b").to(Self::WordBoundary),
just("^").to(Self::LineStart),
just("$").to(Self::LineEnd),
just("~").to(Self::LastDelim),
// Classes
just("[[:space:]]").map(|_| Self::Whitespace),
regex
items
.clone()
.delimited_by(just("[^"), just(']'))
.map(Regex::NegSet),
items
.clone()
.repeated()
.collect()
.delimited_by(just('['), just(']'))
.map(Regex::Set),
regex
items
.clone()
.repeated()
.collect()
.delimited_by(just('('), just(')'))
.map(Regex::Group),
));
atom.pratt((
postfix(0, just('*'), |r, _, _| Self::Many(0, Box::new(r))),
postfix(0, just('+'), |r, _, _| Self::Many(1, Box::new(r))),
postfix(1, just('*'), |r, _, _| Self::Many(0, !0, Box::new(r))),
postfix(1, just('+'), |r, _, _| Self::Many(1, !0, Box::new(r))),
postfix(1, just('?'), |r, _, _| Self::Many(0, 1, Box::new(r))),
// Non-standard: `x@y` parses `x` and then `y`. `y` can use `~` to refer to the extra string that was
// parsed by `x`. This supports nesting and is intended for context-sensitive patterns like Rust raw
// strings.
infix(left(0), just('@'), |d, _, r, _| {
Self::Delim(Box::new(d), Box::new(r))
}),
))
})
.repeated()

View file

@ -49,6 +49,11 @@ impl ToString for Text {
}
impl Text {
// TODO: Remove this
pub fn chars(&self) -> &[char] {
&self.chars
}
pub fn to_coord(&self, pos: usize) -> [isize; 2] {
let mut n = 0;
let mut last_n = 0;
@ -138,8 +143,8 @@ impl Buffer {
Err(err) => return Err(err.into()),
};
Ok(Self {
highlights: Highlighter::from_file_name(&path).map(|h| h.highlight(&chars)),
text: Text { chars },
highlights: Highlighter::from_file_name(&path).map(|h| h.highlight(&s)),
cursors: HopSlotMap::default(),
dir,
path: Some(path),
@ -159,7 +164,7 @@ impl Buffer {
self.highlights = self
.highlights
.take()
.map(|hl| hl.highlighter.highlight(&self.text.to_string()));
.map(|hl| hl.highlighter.highlight(self.text.chars()));
}
pub fn clear(&mut self) {

View file

@ -354,7 +354,8 @@ impl<'a> Terminal<'a> {
stdout.queue(style::Print(c)).unwrap();
// Move cursor
cursor_pos[0] += 1;
cursor_pos[0] +=
unicode_display_width::width(c.encode_utf8(&mut [0; 4])) as u16;
}
}
}

View file

@ -51,6 +51,16 @@ pub struct Theme {
pub hl_token_keyword: Color,
pub hl_token_number: Color,
pub hl_token_type: Color,
pub hl_token_comment: Color,
pub hl_token_operator: Color,
pub hl_token_delimiter: Color,
pub hl_token_doc: Color,
pub hl_token_attribute: Color,
pub hl_token_property: Color,
pub hl_token_macro: Color,
pub hl_token_string: Color,
pub hl_token_special: Color,
pub hl_token_constant: Color,
}
impl Default for Theme {
@ -73,10 +83,20 @@ impl Default for Theme {
option_new: Color::AnsiValue(148),
hl_token_whitespace: Color::Reset,
hl_token_ident: Color::AnsiValue(187),
hl_token_keyword: Color::AnsiValue(46),
hl_token_ident: Color::AnsiValue(7),
hl_token_keyword: Color::AnsiValue(112),
hl_token_number: Color::AnsiValue(45),
hl_token_type: Color::AnsiValue(203),
hl_token_type: Color::AnsiValue(210),
hl_token_comment: Color::AnsiValue(145),
hl_token_operator: Color::AnsiValue(111),
hl_token_delimiter: Color::AnsiValue(37),
hl_token_doc: Color::AnsiValue(180),
hl_token_attribute: Color::AnsiValue(146),
hl_token_property: Color::AnsiValue(152),
hl_token_macro: Color::AnsiValue(117),
hl_token_string: Color::AnsiValue(179),
hl_token_special: Color::AnsiValue(160),
hl_token_constant: Color::AnsiValue(81),
}
}
}
@ -89,6 +109,16 @@ impl Theme {
TokenKind::Keyword => self.hl_token_keyword,
TokenKind::Number => self.hl_token_number,
TokenKind::Type => self.hl_token_type,
TokenKind::Comment => self.hl_token_comment,
TokenKind::Operator => self.hl_token_operator,
TokenKind::Delimiter => self.hl_token_delimiter,
TokenKind::Doc => self.hl_token_doc,
TokenKind::Attribute => self.hl_token_attribute,
TokenKind::Property => self.hl_token_property,
TokenKind::Macro => self.hl_token_macro,
TokenKind::String => self.hl_token_string,
TokenKind::Special => self.hl_token_special,
TokenKind::Constant => self.hl_token_constant,
}
}
}