1use crate::error::{ParseError, Position, Span};
2use std::fmt;
3
4#[derive(Debug, Clone, PartialEq)]
5pub enum TokenKind {
6 U32(u32),
8
9 Dice, Eof,
14}
15
16impl fmt::Display for TokenKind {
17 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
18 match self {
19 TokenKind::U32(n) => write!(f, "{n}"),
20 TokenKind::Dice => write!(f, "D"),
21 TokenKind::Eof => write!(f, "EOF"),
22 }
23 }
24}
25
26#[derive(Debug, Clone, PartialEq)]
27pub struct Token {
28 pub kind: TokenKind,
29 pub span: Span,
30}
31
32impl Token {
33 pub fn new(kind: TokenKind, span: Span) -> Self {
34 Self { kind, span }
35 }
36}
37
38pub struct Lexer<'a> {
39 input: &'a str,
40 chars: std::str::CharIndices<'a>,
41 current: Option<(usize, char)>,
42 position: Position,
43}
44
45impl<'a> Lexer<'a> {
46 pub fn new(input: &'a str) -> Self {
47 let mut chars = input.char_indices();
48 let current = chars.next();
49
50 Self {
51 input,
52 chars,
53 current,
54 position: Position::new(1, 1, 0),
55 }
56 }
57
58 pub fn lex(&self) -> Result<Vec<Token>, ParseError> {
59 let mut lexer = Lexer::new(self.input);
60 lexer.tokenize()
61 }
62
63 fn current_char(&self) -> Option<char> {
64 self.current.map(|(_, c)| c)
65 }
66
67 fn current_offset(&self) -> usize {
68 self.current.map_or(self.input.len(), |(offset, _)| offset)
69 }
70
71 fn advance(&mut self) -> Option<char> {
72 if let Some((_, c)) = self.current {
73 if c == '\n' {
74 self.position.line += 1;
75 self.position.column = 1;
76 } else {
77 self.position.column += 1;
78 }
79 self.position.offset += c.len_utf8() as u32;
80 }
81
82 self.current = self.chars.next();
83 self.current_char()
84 }
85
86 fn read_identifier(&mut self) -> Result<Token, ParseError> {
87 let start_pos = self.position;
88 let start_offset = self.current_offset();
89
90 while let Some(c) = self.current_char() {
91 if c.is_alphabetic() || c == '_' {
92 self.advance();
93 } else {
94 break;
95 }
96 }
97
98 let end_offset = self.current_offset();
99 let text = &self.input[start_offset..end_offset];
100
101 let kind = match text {
102 "d" | "D" => TokenKind::Dice,
103 _ => {
104 return Err(ParseError::lexical_error(
105 Span::new(start_pos, self.position),
106 format!("Invalid identifier: {text}"),
107 ));
108 }
109 };
110
111 Ok(Token::new(kind, Span::new(start_pos, self.position)))
112 }
113
114 fn read_number(&mut self) -> Result<Token, ParseError> {
115 let start_pos = self.position;
116 let start_offset = self.current_offset();
117
118 while let Some(c) = self.current_char() {
119 if c.is_ascii_digit() {
120 self.advance();
121 } else {
122 break;
123 }
124 }
125
126 let end_offset = self.current_offset();
127 let text = &self.input[start_offset..end_offset];
128 match text.parse::<u32>() {
129 Ok(value) => Ok(Token::new(
130 TokenKind::U32(value),
131 Span::new(start_pos, self.position),
132 )),
133 Err(_) => Err(ParseError::invalid_number_literal(
134 Span::new(start_pos, self.position),
135 format!("Invalid number literal: {text}"),
136 )),
137 }
138 }
139
140 pub fn next_token(&mut self) -> Result<Token, ParseError> {
141 let start_pos = self.position;
142
143 match self.current_char() {
144 Some(c) if c.is_ascii_digit() => self.read_number(),
145 Some(c) if c.is_alphabetic() => self.read_identifier(),
146 Some(c) => {
147 self.advance();
148 Err(ParseError::lexical_error(
149 Span::new(start_pos, self.position),
150 format!("Unexpected character: {c}"),
151 ))
152 }
153 None => Ok(Token::new(TokenKind::Eof, Span::single(start_pos))),
154 }
155 }
156
157 pub fn tokenize(&mut self) -> Result<Vec<Token>, ParseError> {
158 let mut tokens = Vec::new();
159
160 loop {
161 let token = self.next_token()?;
162 let is_eof = matches!(token.kind, TokenKind::Eof);
163 tokens.push(token);
164
165 if is_eof {
166 break;
167 }
168 }
169
170 Ok(tokens)
171 }
172}