Added primitive syntax highlighting

This commit is contained in:
Joshua Barretto 2025-06-14 23:43:32 +01:00
parent 5901a6cd1f
commit a64884d894
7 changed files with 469 additions and 12 deletions

143
Cargo.lock generated
View file

@ -2,6 +2,21 @@
# It is not intended for manual editing.
version = 4
[[package]]
name = "aho-corasick"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916"
dependencies = [
"memchr",
]
[[package]]
name = "allocator-api2"
version = "0.2.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923"
[[package]]
name = "anstream"
version = "0.6.11"
@ -68,12 +83,35 @@ version = "2.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed570934406eb16438a4e976b1b4500774099c13b8cb96eec99f620f05090ddf"
[[package]]
name = "cc"
version = "1.2.27"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc"
dependencies = [
"shlex",
]
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chumsky"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "14377e276b2c8300513dff55ba4cc4142b44e5d6de6d00eb5b2307d650bb4ec1"
dependencies = [
"hashbrown",
"regex-automata",
"serde",
"stacker",
"unicode-ident",
"unicode-segmentation",
]
[[package]]
name = "clap"
version = "4.4.18"
@ -145,6 +183,29 @@ dependencies = [
"winapi",
]
[[package]]
name = "equivalent"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f"
[[package]]
name = "foldhash"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
[[package]]
name = "hashbrown"
version = "0.15.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5"
dependencies = [
"allocator-api2",
"equivalent",
"foldhash",
]
[[package]]
name = "heck"
version = "0.4.1"
@ -153,9 +214,9 @@ checksum = "95505c38b4572b2d910cecb0281560f54b440a19336cbbcb27bf6ce6adc6f5a8"
[[package]]
name = "libc"
version = "0.2.153"
version = "0.2.173"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
checksum = "d8cfeafaffdbc32176b64fb251369d52ea9f0a8fbc6f8759edffef7b525d64bb"
[[package]]
name = "lock_api"
@ -173,6 +234,12 @@ version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "memchr"
version = "2.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0"
[[package]]
name = "mio"
version = "0.8.10"
@ -217,6 +284,15 @@ dependencies = [
"unicode-ident",
]
[[package]]
name = "psm"
version = "0.1.26"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6e944464ec8536cd1beb0bbfd96987eb5e3b72f2ecdafdc5c769a37f1fa2ae1f"
dependencies = [
"cc",
]
[[package]]
name = "quote"
version = "1.0.35"
@ -235,12 +311,55 @@ dependencies = [
"bitflags 1.3.2",
]
[[package]]
name = "regex-automata"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59b23e92ee4318893fa3fe3e6fb365258efbfe6ac6ab30f090cdcbb7aa37efa9"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.7.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbb5fb1acd8a1a18b3dd5be62d25485eb770e05afb408a9627d14d451bae12da"
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "serde"
version = "1.0.210"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8e3592472072e6e22e0a54d5904d9febf8508f65fb8552499a1abc7d1078c3a"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.210"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "243902eda00fad750862fc144cea25caca5e20d615af0a81bee94ca738f1df1f"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "shlex"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
[[package]]
name = "signal-hook"
version = "0.3.17"
@ -286,6 +405,19 @@ version = "1.13.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7"
[[package]]
name = "stacker"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cddb07e32ddb770749da91081d8d0ac3a16f1a569a18b20348cd371f5dead06b"
dependencies = [
"cc",
"cfg-if",
"libc",
"psm",
"windows-sys 0.52.0",
]
[[package]]
name = "strsim"
version = "0.10.0"
@ -329,6 +461,12 @@ version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "unicode-segmentation"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6ccf251212114b54433ec949fd6a7841275f9ada20dddd2f29e9ceea4501493"
[[package]]
name = "utf8parse"
version = "0.2.1"
@ -505,6 +643,7 @@ checksum = "dff9641d1cd4be8d1a070daf9e3773c5f67e78b4d9d42263020c057706765c04"
name = "zte2"
version = "0.2.0"
dependencies = [
"chumsky",
"clap",
"crossterm",
"slotmap",

View file

@ -8,6 +8,7 @@ clap = { version = "4.4", features = ["derive"] }
slotmap = "1.0"
crossterm = "0.27"
thiserror = "1.0"
chumsky = { version = "0.10.1", features = ["pratt"] }
[profile.dev]
opt-level = 2

265
src/highlight/mod.rs Normal file
View file

@ -0,0 +1,265 @@
use std::{ops::Range, path::Path};
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum TokenKind {
Whitespace,
Ident,
Keyword,
Number,
Type,
}
pub struct Highlighter {
// regex: meta::Regex,
matchers: Vec<Regex>,
entries: Vec<TokenKind>,
}
impl Highlighter {
pub fn new_from_regex<P: AsRef<str>>(
patterns: impl IntoIterator<Item = (TokenKind, P)>,
) -> Self {
let (entries, patterns): (_, Vec<_>) = patterns.into_iter().unzip();
let matchers = patterns
.iter()
.map(|p| Regex::parser().parse(p.as_ref()).unwrap())
.collect();
Self {
entries,
/*regex: meta::Regex::new_many(&patterns).unwrap(),*/ matchers,
}
}
pub fn from_file_name(file_name: &Path) -> Option<Self> {
match file_name.extension()?.to_str()? {
"rs" => Some(Self::rust()),
_ => None,
}
}
pub fn rust() -> Self {
Self::new_from_regex([
(
TokenKind::Keyword,
r"\b[(pub)(enum)(let)(self)(Self)(fn)(impl)(struct)(use)(if)(while)(for)(loop)(mod)]\b",
),
(TokenKind::Ident, r"[a-z_][A-Za-z0-9_]*"),
(TokenKind::Type, r"[A-Z_][A-Za-z0-9_]*"),
(TokenKind::Number, r"[0-9][A-Za-z0-9_]*"),
])
}
fn highlight_str(&self, mut s: &str) -> Vec<(Range<usize>, TokenKind)> {
let mut tokens = Vec::new();
let mut i = 0;
loop {
let n = if let Some((idx, n)) = self
.matchers
.iter()
.enumerate()
.find_map(|(i, r)| Some((i, r.matches(s)?)))
{
tokens.push((i..i + n, self.entries[idx]));
n
} else if let Some((n, _)) = s.char_indices().nth(1) {
n
} else {
break;
};
i += n;
s = &s[n..];
}
tokens
}
pub fn highlight(self, s: &str) -> Highlights {
let tokens = self.highlight_str(s);
Highlights {
highlighter: self,
tokens,
}
}
}
pub struct Highlights {
pub highlighter: Highlighter,
tokens: Vec<(Range<usize>, TokenKind)>,
}
impl Highlights {
pub fn insert(&mut self, at: usize, s: &str) {}
pub fn get_at(&self, pos: usize) -> Option<TokenKind> {
let idx = self.tokens
.binary_search_by_key(&pos, |(r, _)| r.start)
// .ok()?
.unwrap_or_else(|p| p.saturating_sub(1))
// .saturating_sub(1)
;
let (r, tok) = self.tokens.get(idx)?;
if r.contains(&pos) { Some(*tok) } else { None }
}
}
#[derive(Clone, Debug)]
pub enum Regex {
Whitespace,
WordBoundary,
Range(char, char),
Char(char),
Set(Vec<Self>),
Group(Vec<Self>),
// (at_least, _)
Many(usize, Box<Self>),
}
struct State<'a> {
s: &'a str,
pos: usize,
}
impl State<'_> {
fn peek(&self) -> Option<char> {
self.s[self.pos..].chars().next()
}
fn prev(&self) -> Option<char> {
self.s[..self.pos].chars().rev().next()
}
fn skip(&mut self) {
if let Some(c) = self.peek() {
self.pos += c.len_utf8();
}
}
fn attempt(&mut self, r: &Regex) -> Option<()> {
let old_pos = self.pos;
if self.go(r).is_some() {
Some(())
} else {
self.pos = old_pos;
None
}
}
fn go(&mut self, r: &Regex) -> Option<()> {
match r {
Regex::WordBoundary => {
let is_word = |c: char| c.is_alphanumeric() || c == '_';
(is_word(self.prev().unwrap_or(' ')) != is_word(self.peek().unwrap_or(' ')))
.then_some(())
}
Regex::Char(c) => {
if self.peek()? == *c {
self.skip();
Some(())
} else {
None
}
}
Regex::Whitespace => {
let mut once = false;
while let Some(c) = self.peek() {
if c.is_ascii_whitespace() {
self.skip();
once = true;
} else {
break;
}
}
once.then_some(())
}
Regex::Set(xs) => xs.iter().find_map(|x| self.attempt(x)),
Regex::Group(xs) => {
for x in xs {
self.go(x)?;
}
Some(())
}
Regex::Range(a, b) => {
if (a..=b).contains(&&self.peek()?) {
self.skip();
Some(())
} else {
None
}
}
Regex::Many(at_least, x) => {
let mut times = 0;
loop {
if self.attempt(x).is_none() {
break;
}
times += 1;
}
if times >= *at_least { Some(()) } else { None }
}
r => todo!("{r:?}"),
}
}
}
impl Regex {
fn matches(&self, s: &str) -> Option<usize> {
let mut s = State { s, pos: 0 };
s.go(self).map(|_| s.pos)
}
}
use chumsky::{pratt::postfix, prelude::*};
impl Regex {
fn parser<'a>() -> impl Parser<'a, &'a str, Self, extra::Err<Rich<'a, char>>> {
recursive(|regex| {
let char_ = any().filter(|c: &char| c.is_alphanumeric() || *c == '_');
let range = char_
.then_ignore(just('-'))
.then(char_)
.map(|(a, b)| Self::Range(a, b));
let atom = choice((
range,
char_.map(Self::Char),
just("\\b").to(Self::WordBoundary),
// Classes
just("[[:space:]]").map(|_| Self::Whitespace),
regex
.clone()
.repeated()
.collect()
.delimited_by(just('['), just(']'))
.map(Regex::Set),
regex
.clone()
.repeated()
.collect()
.delimited_by(just('('), just(')'))
.map(Regex::Group),
));
atom.pratt((
postfix(0, just('*'), |r, _, _| Self::Many(0, Box::new(r))),
postfix(0, just('+'), |r, _, _| Self::Many(1, Box::new(r))),
))
})
.repeated()
.collect()
.map(Self::Group)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn simple() {
let hl = Highlighter::rust().highlight("pub");
assert_eq!(hl.tokens, Vec::new());
}
}

View file

@ -1,4 +1,5 @@
mod action;
mod highlight;
mod state;
mod terminal;
mod theme;

View file

@ -1,4 +1,8 @@
use crate::{Args, Dir, Error, theme};
use crate::{
Args, Dir, Error,
highlight::{Highlighter, Highlights},
theme,
};
use slotmap::{HopSlotMap, new_key_type};
use std::{
io,
@ -107,19 +111,20 @@ impl Text {
#[derive(Default)]
pub struct Buffer {
pub text: Text,
pub highlights: Option<Highlights>,
pub cursors: HopSlotMap<CursorId, Cursor>,
pub dir: Option<PathBuf>,
pub path: Option<PathBuf>,
pub text: Text,
pub cursors: HopSlotMap<CursorId, Cursor>,
}
impl Buffer {
pub fn from_file(path: PathBuf) -> Result<Self, Error> {
let (dir, chars) = match std::fs::read_to_string(&path) {
let (dir, chars, s) = match std::fs::read_to_string(&path) {
Ok(s) => {
let mut path = path.canonicalize()?;
path.pop();
(Some(path), s.chars().collect())
(Some(path), s.chars().collect(), s)
}
// If the file doesn't exist, create a new file
Err(err) if err.kind() == io::ErrorKind::NotFound => {
@ -128,15 +133,16 @@ impl Buffer {
.filter(|p| p.to_str() != Some(""))
.map(Path::to_owned)
.or_else(|| std::env::current_dir().ok());
(dir, Vec::new())
(dir, Vec::new(), String::new())
}
Err(err) => return Err(err.into()),
};
Ok(Self {
text: Text { chars },
highlights: Highlighter::from_file_name(&path).map(|h| h.highlight(&s)),
cursors: HopSlotMap::default(),
dir,
path: Some(path),
text: Text { chars },
cursors: HopSlotMap::default(),
})
}
@ -149,8 +155,16 @@ impl Buffer {
)
}
fn update_highlights(&mut self) {
self.highlights = self
.highlights
.take()
.map(|hl| hl.highlighter.highlight(&self.text.to_string()));
}
pub fn clear(&mut self) {
self.text.chars.clear();
self.update_highlights();
// Reset cursors
self.cursors.values_mut().for_each(|cursor| {
*cursor = Cursor::default();
@ -226,6 +240,7 @@ impl Buffer {
.insert((pos + n).min(self.text.chars.len()), c);
n += 1;
}
self.update_highlights();
self.cursors.values_mut().for_each(|cursor| {
if cursor.base >= pos {
cursor.base += n;
@ -253,6 +268,7 @@ impl Buffer {
pub fn remove(&mut self, range: Range<usize>) {
// TODO: Bell if false?
self.text.chars.drain(range.clone());
self.update_highlights();
self.cursors.values_mut().for_each(|cursor| {
if cursor.base >= range.start {
cursor.base = cursor

View file

@ -1,4 +1,4 @@
use crate::Color;
use crate::{Color, highlight::TokenKind};
pub struct BorderTheme {
pub left: char,
@ -45,6 +45,12 @@ pub struct Theme {
pub option_dir: Color,
pub option_file: Color,
pub option_new: Color,
pub hl_token_whitespace: Color,
pub hl_token_ident: Color,
pub hl_token_keyword: Color,
pub hl_token_number: Color,
pub hl_token_type: Color,
}
impl Default for Theme {
@ -65,6 +71,24 @@ impl Default for Theme {
option_dir: Color::AnsiValue(178),
option_file: Color::Reset,
option_new: Color::AnsiValue(148),
hl_token_whitespace: Color::Reset,
hl_token_ident: Color::AnsiValue(187),
hl_token_keyword: Color::AnsiValue(46),
hl_token_number: Color::AnsiValue(45),
hl_token_type: Color::AnsiValue(203),
}
}
}
impl Theme {
pub fn token_color(&self, token: TokenKind) -> Color {
match token {
TokenKind::Whitespace => self.hl_token_whitespace,
TokenKind::Ident => self.hl_token_ident,
TokenKind::Keyword => self.hl_token_keyword,
TokenKind::Number => self.hl_token_number,
TokenKind::Type => self.hl_token_type,
}
}
}

View file

@ -160,7 +160,18 @@ impl Input {
let selected = cursor.selection().map_or(false, |s| s.contains(&pos));
let (fg, c) = match line[coord as usize] {
'\n' if selected => (state.theme.whitespace, '⮠'),
c => (state.theme.text, c),
c => {
if let Some(fg) = buffer
.highlights
.as_ref()
.and_then(|hl| hl.get_at(pos))
.map(|tok| state.theme.token_color(tok))
{
(fg, c)
} else {
(state.theme.text, c)
}
}
};
frame
.with_bg(if !selected {