dice_rust/
lexer.rs

1use crate::error::{ParseError, Position, Span};
2use std::fmt;
3
4#[derive(Debug, Clone, PartialEq)]
5pub enum TokenKind {
6    // Literals
7    U32(u32),
8
9    // Operators
10    Dice, // d or D
11
12    // End of file
13    Eof,
14}
15
16impl fmt::Display for TokenKind {
17    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
18        match self {
19            TokenKind::U32(n) => write!(f, "{n}"),
20            TokenKind::Dice => write!(f, "D"),
21            TokenKind::Eof => write!(f, "EOF"),
22        }
23    }
24}
25
26#[derive(Debug, Clone, PartialEq)]
27pub struct Token {
28    pub kind: TokenKind,
29    pub span: Span,
30}
31
32impl Token {
33    pub fn new(kind: TokenKind, span: Span) -> Self {
34        Self { kind, span }
35    }
36}
37
38pub struct Lexer<'a> {
39    input: &'a str,
40    chars: std::str::CharIndices<'a>,
41    current: Option<(usize, char)>,
42    position: Position,
43}
44
45impl<'a> Lexer<'a> {
46    pub fn new(input: &'a str) -> Self {
47        let mut chars = input.char_indices();
48        let current = chars.next();
49
50        Self {
51            input,
52            chars,
53            current,
54            position: Position::new(1, 1, 0),
55        }
56    }
57
58    pub fn lex(&self) -> Result<Vec<Token>, ParseError> {
59        let mut lexer = Lexer::new(self.input);
60        lexer.tokenize()
61    }
62
63    fn current_char(&self) -> Option<char> {
64        self.current.map(|(_, c)| c)
65    }
66
67    fn current_offset(&self) -> usize {
68        self.current.map_or(self.input.len(), |(offset, _)| offset)
69    }
70
71    fn advance(&mut self) -> Option<char> {
72        if let Some((_, c)) = self.current {
73            if c == '\n' {
74                self.position.line += 1;
75                self.position.column = 1;
76            } else {
77                self.position.column += 1;
78            }
79            self.position.offset += c.len_utf8() as u32;
80        }
81
82        self.current = self.chars.next();
83        self.current_char()
84    }
85
86    fn read_identifier(&mut self) -> Result<Token, ParseError> {
87        let start_pos = self.position;
88        let start_offset = self.current_offset();
89
90        while let Some(c) = self.current_char() {
91            if c.is_alphabetic() || c == '_' {
92                self.advance();
93            } else {
94                break;
95            }
96        }
97
98        let end_offset = self.current_offset();
99        let text = &self.input[start_offset..end_offset];
100
101        let kind = match text {
102            "d" | "D" => TokenKind::Dice,
103            _ => {
104                return Err(ParseError::lexical_error(
105                    Span::new(start_pos, self.position),
106                    format!("Invalid identifier: {text}"),
107                ));
108            }
109        };
110
111        Ok(Token::new(kind, Span::new(start_pos, self.position)))
112    }
113
114    fn read_number(&mut self) -> Result<Token, ParseError> {
115        let start_pos = self.position;
116        let start_offset = self.current_offset();
117
118        while let Some(c) = self.current_char() {
119            if c.is_ascii_digit() {
120                self.advance();
121            } else {
122                break;
123            }
124        }
125
126        let end_offset = self.current_offset();
127        let text = &self.input[start_offset..end_offset];
128        match text.parse::<u32>() {
129            Ok(value) => Ok(Token::new(
130                TokenKind::U32(value),
131                Span::new(start_pos, self.position),
132            )),
133            Err(_) => Err(ParseError::invalid_number_literal(
134                Span::new(start_pos, self.position),
135                format!("Invalid number literal: {text}"),
136            )),
137        }
138    }
139
140    pub fn next_token(&mut self) -> Result<Token, ParseError> {
141        let start_pos = self.position;
142
143        match self.current_char() {
144            Some(c) if c.is_ascii_digit() => self.read_number(),
145            Some(c) if c.is_alphabetic() => self.read_identifier(),
146            Some(c) => {
147                self.advance();
148                Err(ParseError::lexical_error(
149                    Span::new(start_pos, self.position),
150                    format!("Unexpected character: {c}"),
151                ))
152            }
153            None => Ok(Token::new(TokenKind::Eof, Span::single(start_pos))),
154        }
155    }
156
157    pub fn tokenize(&mut self) -> Result<Vec<Token>, ParseError> {
158        let mut tokens = Vec::new();
159
160        loop {
161            let token = self.next_token()?;
162            let is_eof = matches!(token.kind, TokenKind::Eof);
163            tokens.push(token);
164
165            if is_eof {
166                break;
167            }
168        }
169
170        Ok(tokens)
171    }
172}