1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
//! Contains the interpreter function along with the InterpreterConstants
//! enumeration, as used in the Interpreter design example.
//-----------------------------------------------------------------------------
use crate::helpers::titlecase;
//-----------------------------------------------------------------------------
/// Represents constants for special characters to be used as interpreted
/// tokens.
pub enum InterpreterConstants {
///< Period
PERIOD = 100,
///< Question mark
QUESTION = 101,
///< Marker for end of a token list.
EOL = -1
}
//-----------------------------------------------------------------------------
/// The 40 most common words in English (in order but that doesn't really
/// matter here). A token is nothing more than an index into this list.
static _COMMONWORDS: [&str; 40] =
[
"the",
"be",
"to",
"of",
"and",
"a",
"in",
"that",
"have",
"I",
"it",
"for",
"not",
"on",
"with",
"he",
"as",
"you",
"do",
"at",
"this",
"but",
"his",
"by",
"from",
"they",
"we",
"say",
"her",
"she",
"or",
"an",
"will",
"my",
"one",
"all",
"would",
"there",
"their",
"what",
];
/// Helper function to convert the token into its corresponding word or
/// punctuation mark.
///
/// # Parameters
/// - token
///
/// The token to interpret.
///
/// # Returns
/// Returns a string containing the corresponding word or punctuation. If
/// the token is not recognized, the string returns "<UNKNOWN TOKEN #>",
/// where # is the token value.
fn _interpret_token(token: usize) -> String {
// Rule 1: token is between 0 and the number of common words.
if token < _COMMONWORDS.len()
{
_COMMONWORDS[token].to_string()
}
else
{
// Rule 1: token can also be a PERIOD
if token == (InterpreterConstants::PERIOD as usize) {
String::from(".")
}
// Rule 1: or the token can also be a QUESTION
else if token == (InterpreterConstants::QUESTION as usize)
{
String::from("?")
}
else
{
// Rule 1: Invalid tokens returned as unknown.
format!("<UNKNOWN TOKEN {token}>")
}
}
}
/// This function is a simple interpreter.
///
/// The interpreter takes an array of integer tokens and converts each
/// token into a word or punctuation mark. The interpreter then arranges
/// the words into a space-separated list in a single string. In other
/// words, the tokens are converted into a sentence, with the first word
/// capitalized and no space between the last two "words" under the
/// assumption the last word is actually a punctuation mark.
///
/// Interpreter Rules:
/// 1. Each token must be in the range of 0 through 39 (maximum number of
/// words known by the interpreter) or must be 100 ('.') or 101 ('?').
/// 2. The word corresponding to the first token is always capitalized.
/// 3. A single space appears between each word.
/// 4. No space appears between the last two tokens.
///
/// # Parameters
/// - tokenList
///
/// List of integer tokens to be interpreted. The list is assumed to be
/// terminated by -1 (EOL).
///
/// # Returns
/// Returns a new String containing the result of the interpretation.
pub fn interpreter_interpret(token_list: &[usize]) -> String {
let mut output = String::new();
let mut token_index = 0;
while token_list[token_index] != (InterpreterConstants::EOL as usize) {
let mut token_as_string = _interpret_token(token_list[token_index]);
if token_index == 0 {
// Rule 2: First word in sentence gets capitalized according to local rules.
token_as_string = titlecase::titlecase(&token_as_string);
}
output.push_str(&token_as_string);
// Rule 4: No space between last two tokens (if the following expression is false)
if token_index + 2 < token_list.len() && token_list[token_index + 2] != (InterpreterConstants::EOL as usize) {
// Rule 3: Separate all words by a single space.
output.push(' ');
}
token_index += 1;
}
output
}