iota_transactional_test_runner/programmable_transaction_test_parser/
token.rs

1// Copyright (c) Mysten Labs, Inc.
2// Modifications Copyright (c) 2024 IOTA Stiftung
3// SPDX-License-Identifier: Apache-2.0
4
5use std::fmt::{self, Display};
6
7use anyhow::bail;
8use move_core_types::{identifier, parsing::parser::Token};
9
10#[derive(Eq, PartialEq, Debug, Clone, Copy)]
11pub enum CommandToken {
12    // any whitespace
13    Whitespace,
14    // // or /* */
15    Comment,
16    // //>
17    CommandStart,
18    // alpha numeric
19    Ident,
20    // digits
21    Number,
22    // ::
23    ColonColon,
24    // :
25    Colon,
26    // ,
27    Comma,
28    // ;
29    Semi,
30    // [
31    LBracket,
32    // ]
33    RBracket,
34    // (
35    LParen,
36    // )
37    RParen,
38    // <...>
39    // eats the whole string, including the < and >, to pass to a different parser
40    TypeArgString,
41    // uninhabited token
42    Void,
43}
44
45pub const TRANSFER_OBJECTS: &str = "TransferObjects";
46pub const SPLIT_COINS: &str = "SplitCoins";
47pub const MERGE_COINS: &str = "MergeCoins";
48pub const MAKE_MOVE_VEC: &str = "MakeMoveVec";
49pub const PUBLISH: &str = "Publish";
50pub const UPGRADE: &str = "Upgrade";
51pub const GAS_COIN: &str = "Gas";
52pub const INPUT: &str = "Input";
53pub const RESULT: &str = "Result";
54pub const NESTED_RESULT: &str = "NestedResult";
55
56impl Display for CommandToken {
57    fn fmt(&self, formatter: &mut fmt::Formatter<'_>) -> Result<(), fmt::Error> {
58        let s = match *self {
59            CommandToken::Whitespace => "[whitespace]",
60            CommandToken::Comment => "[comment]",
61            CommandToken::Ident => "[identifier]",
62            CommandToken::Number => "[num]",
63            CommandToken::CommandStart => "//>",
64            CommandToken::ColonColon => "::",
65            CommandToken::Colon => ":",
66            CommandToken::Comma => ",",
67            CommandToken::Semi => ";",
68            CommandToken::LBracket => "[",
69            CommandToken::RBracket => "]",
70            CommandToken::LParen => "(",
71            CommandToken::RParen => ")",
72            CommandToken::TypeArgString => "<...>",
73            CommandToken::Void => "[void]",
74        };
75        fmt::Display::fmt(s, formatter)
76    }
77}
78
79impl Token for CommandToken {
80    fn is_whitespace(&self) -> bool {
81        matches!(self, Self::Whitespace | Self::Comment | Self::Void)
82    }
83
84    fn next_token(s: &str) -> anyhow::Result<Option<(Self, usize)>> {
85        // parses a string where start matches end.
86        // performs simple matching for start/end pairs
87
88        // type arguments get delegated to a different parser
89        if s.starts_with('<') {
90            let len = parse_sub_token_string(s, "<", ">")?;
91            return Ok(Some((Self::TypeArgString, len)));
92        }
93        // start of a command
94        if s.starts_with("//>") {
95            return Ok(Some((Self::CommandStart, 3)));
96        }
97        // comments
98        if let Some(after) = s.strip_prefix("//") {
99            let mut n = 2;
100            let mut in_whitespace_from_start = true;
101            for c in after.chars() {
102                n += 1;
103                if c == '\n' {
104                    break;
105                }
106                if in_whitespace_from_start && c == '>' {
107                    bail!("Remove whitespace between // and > to start a command");
108                }
109                if !c.is_whitespace() {
110                    in_whitespace_from_start = false;
111                }
112            }
113            return Ok(Some((Self::Comment, n)));
114        }
115        if s.starts_with("/*") {
116            let end = parse_sub_token_string(s, "/*", "*/")?;
117            return Ok(Some((Self::Comment, end)));
118        }
119
120        // other tokens
121        let mut chars = s.chars().peekable();
122        let c = match chars.next() {
123            None => return Ok(None),
124            Some(c) => c,
125        };
126        Ok(Some(match c {
127            '(' => (Self::LParen, 1),
128            ')' => (Self::RParen, 1),
129            '[' => (Self::LBracket, 1),
130            ']' => (Self::RBracket, 1),
131            ',' => (Self::Comma, 1),
132            ';' => (Self::Semi, 1),
133            ':' if matches!(chars.peek(), Some(':')) => (Self::ColonColon, 2),
134            ':' => (Self::Colon, 1),
135            c if c.is_ascii_whitespace() => {
136                // c + remaining
137                let len = 1 + chars.take_while(char::is_ascii_whitespace).count();
138                (Self::Whitespace, len)
139            }
140            c if c.is_ascii_digit() => {
141                // c + remaining
142                let len = 1 + chars
143                    .take_while(|c| char::is_ascii_digit(c) || *c == '_')
144                    .count();
145                (CommandToken::Number, len)
146            }
147            c if c.is_ascii_alphabetic() || c == '_' => {
148                // c + remaining
149                let len = 1 + chars
150                    .take_while(|c| identifier::is_valid_identifier_char(*c))
151                    .count();
152                (Self::Ident, len)
153            }
154            _ => bail!("unrecognized token: {}", s),
155        }))
156    }
157}
158
159fn parse_sub_token_string(mut s: &str, start: &str, end: &str) -> anyhow::Result<usize> {
160    // the length of the string until the matching end
161    let mut len = 0;
162    let start_len = start.len();
163    let end_len = end.len();
164    // the count of number of active start/end pairs
165    let mut count = 0i32;
166    loop {
167        s = if s.is_empty() {
168            bail!("Unexpected end of string after '{start}'. Expected matching '{end}'")
169        } else if let Some(next) = s.strip_prefix(start) {
170            len += start_len;
171            // new start
172            count += 1;
173            next
174        } else if let Some(next) = s.strip_prefix(end) {
175            len += end_len;
176            // an end
177            count -= 1;
178            if count == 0 {
179                // end found
180                break;
181            }
182            next
183        } else {
184            len += 1;
185            &s[1..]
186        }
187    }
188    Ok(len)
189}