diff --git a/README.md b/README.md index 0cce1769..17c420c2 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,7 @@ directly and efficiently as possible to WASM GC features. Zena is not ready for use! -Zena is very early in it' development and may things are changing, including +Zena is very early in it's development and may things are changing, including syntax, and defaults for immutability, etc. Many features are partially implemented, and there are likely lots of hidden bugs in the features that are implemented. @@ -495,7 +495,7 @@ we are trying to answer with Zena include: Zena is not yet released. To build from source: ```bash -git clone https://github.com/nicolo-ribaudo/zena.git +git clone https://github.com/elematic/zena.git cd zena npm install npm run build @@ -504,9 +504,8 @@ npm test ### Prerequisites -- Node.js v25+ -- npm -- [wasmtime](https://wasmtime.dev/) (for running WASI programs) +- Node.js, Deno, or Bun +- A WebAssembly runtime (that supports for running WASI programs) ## License diff --git a/packages/zena-compiler/package.json b/packages/zena-compiler/package.json index ec6a7073..01d61671 100644 --- a/packages/zena-compiler/package.json +++ b/packages/zena-compiler/package.json @@ -31,7 +31,7 @@ "clean": "if-file-deleted" }, "test": { - "command": "node --experimental-wasm-exnref ../cli/lib/cli.js test 'zena/*_test.zena'", + "command": "node --experimental-wasm-exnref ../cli/lib/cli.js test 'zena/test/*_test.zena'", "files": [ "zena/**/*.zena" ], diff --git a/packages/zena-compiler/zena/lib/tokenizer.zena b/packages/zena-compiler/zena/lib/tokenizer.zena new file mode 100644 index 00000000..4e9a4a92 --- /dev/null +++ b/packages/zena-compiler/zena/lib/tokenizer.zena @@ -0,0 +1,808 @@ +// Tokenizer for the Zena programming language +// Self-hosted compiler implementation + +import { Array } from 'zena:array'; +import { Map } from 'zena:map'; +import { StringReader } from 'zena:string-reader'; +import { StringBuilder } from 'zena:string-builder'; + +// ============================================================================ +// Token Types +// ============================================================================ + +export enum TokenType { + // Keywords + Let, + Var, + Class, + Import, + Export, + Return, + If, + Else, + While, + For, + Break, + Continue, + True, + False, + Null, + New, + This, + Extends, + Interface, + Implements, + Final, + Super, + Mixin, + With, + On, + Abstract, + Operator, + Declare, + Function, + From, + Type, + Distinct, + As, + Is, + Extension, + Static, + Throw, + Try, + Catch, + Finally, + Match, + Case, + Symbol, + Enum, + In, + Inline, + + // Identifiers & Literals + Identifier, + Number, + String, + + // Template literals + NoSubstitutionTemplate, + TemplateHead, + TemplateMiddle, + TemplateTail, + + // Operators + Equals, + EqualsEquals, + EqualsEqualsEquals, + Bang, + BangEquals, + BangEqualsEquals, + Less, + LessEquals, + LessLess, + Greater, + GreaterEquals, + GreaterGreater, + GreaterGreaterGreater, + Arrow, + Plus, + Minus, + Star, + Slash, + Percent, + Pipe, + PipePipe, + PipeGreater, + Ampersand, + AmpersandAmpersand, + Caret, + Question, + + // Punctuation + LParen, + RParen, + LBrace, + RBrace, + LBracket, + RBracket, + Colon, + Semi, + Comma, + Dot, + DotDot, + DotDotDot, + Hash, + At, + Dollar, + + EOF, + Unknown +} + +// ============================================================================ +// Token +// ============================================================================ + +export class Token { + let kind: TokenType; + let value: string; + let rawValue: string | null; + let line: i32; + let column: i32; + let start: i32; + let end: i32; + + new(t: TokenType, v: string, l: i32, c: i32, s: i32, e: i32, r: string | null) : kind = t, value = v, rawValue = r, line = l, column = c, start = s, end = e { } +} + +// ============================================================================ +// Keywords +// ============================================================================ + +let KEYWORDS: Map = { + 'let' => TokenType.Let, + 'var' => TokenType.Var, + 'class' => TokenType.Class, + 'final' => TokenType.Final, + 'import' => TokenType.Import, + 'export' => TokenType.Export, + 'return' => TokenType.Return, + 'if' => TokenType.If, + 'else' => TokenType.Else, + 'while' => TokenType.While, + 'for' => TokenType.For, + 'break' => TokenType.Break, + 'continue' => TokenType.Continue, + 'true' => TokenType.True, + 'false' => TokenType.False, + 'null' => TokenType.Null, + 'new' => TokenType.New, + 'this' => TokenType.This, + 'extends' => TokenType.Extends, + 'interface' => TokenType.Interface, + 'implements' => TokenType.Implements, + 'super' => TokenType.Super, + 'mixin' => TokenType.Mixin, + 'with' => TokenType.With, + 'on' => TokenType.On, + 'abstract' => TokenType.Abstract, + 'operator' => TokenType.Operator, + 'declare' => TokenType.Declare, + 'function' => TokenType.Function, + 'from' => TokenType.From, + 'type' => TokenType.Type, + 'distinct' => TokenType.Distinct, + 'as' => TokenType.As, + 'is' => TokenType.Is, + 'extension' => TokenType.Extension, + 'static' => TokenType.Static, + 'throw' => TokenType.Throw, + 'try' => TokenType.Try, + 'catch' => TokenType.Catch, + 'finally' => TokenType.Finally, + 'match' => TokenType.Match, + 'case' => TokenType.Case, + 'symbol' => TokenType.Symbol, + 'enum' => TokenType.Enum, + 'in' => TokenType.In, + 'inline' => TokenType.Inline +}; + +// ============================================================================ +// Character helpers +// ============================================================================ + +let isWhitespace = (c: i32): boolean => { + // space=32, tab=9, newline=10, carriage return=13 + return c == 32 || c == 9 || c == 10 || c == 13; +}; + +let isDigit = (c: i32): boolean => { + // '0'=48, '9'=57 + return c >= 48 && c <= 57; +}; + +let isHexDigit = (c: i32): boolean => { + // '0'-'9'=48-57, 'a'-'f'=97-102, 'A'-'F'=65-70 + return (c >= 48 && c <= 57) || (c >= 97 && c <= 102) || (c >= 65 && c <= 70); +}; + +let isAlpha = (c: i32): boolean => { + // 'a'-'z'=97-122, 'A'-'Z'=65-90 + return (c >= 97 && c <= 122) || (c >= 65 && c <= 90); +}; + +let isIdentifierStart = (c: i32): boolean => { + // letters, underscore=95, $=36 + return isAlpha(c) || c == 95 || c == 36; +}; + +let isIdentifierPart = (c: i32): boolean => { + return isAlpha(c) || isDigit(c) || c == 95 || c == 36; +}; + +// ============================================================================ +// Tokenizer +// ============================================================================ + +export class Tokenizer { + let #reader: StringReader; + let #tokens: Array; + let #templateStack: Array; + #line: i32; + #column: i32; + #lineStart: i32; + + new(source: string) + : #reader = new StringReader(source), + #tokens = new Array(64), + #templateStack = new Array(8) + { + this.#line = 1; + this.#column = 1; + this.#lineStart = 0; + } + + // ============================================================================ + // Cursor helpers + // ============================================================================ + + #peek(): i32 { + return this.#reader.peekByte(); + } + + #peekNext(): i32 { + return this.#reader.peekByteAt(1); + } + + #advance(): i32 { + let c = this.#reader.advanceByte(); + if (c == 10) { + // newline + this.#line = this.#line + 1; + this.#lineStart = this.#reader.position; + this.#column = 1; + } else { + this.#column = this.#column + 1; + } + return c; + } + + #currentColumn(): i32 { + return this.#reader.position - this.#lineStart + 1; + } + + #addToken(type: TokenType, value: string, startPos: i32, startCol: i32): void { + let token = new Token(type, value, this.#line, startCol, startPos, this.#reader.position, null); + this.#tokens.push(token); + } + + #addTemplateToken(type: TokenType, cooked: string, raw: string, startPos: i32, startCol: i32): void { + let token = new Token(type, cooked, this.#line, startCol, startPos, this.#reader.position, raw); + this.#tokens.push(token); + } + + // ============================================================================ + // Main tokenize entry point + // ============================================================================ + + tokenize(): Array { + while (!this.#reader.isAtEnd) { + this.#scanToken(); + } + + // Add EOF token + let pos = this.#reader.position; + this.#addToken(TokenType.EOF, '', pos, this.#currentColumn()); + return this.#tokens; + } + + #scanToken(): void { + let startPos = this.#reader.position; + let startCol = this.#currentColumn(); + let c = this.#peek(); + + // Whitespace + if (isWhitespace(c)) { + this.#advance(); + return; + } + + // Numbers + if (isDigit(c)) { + this.#scanNumber(startPos, startCol); + return; + } + + // Identifiers & Keywords + if (isIdentifierStart(c)) { + this.#scanIdentifier(startPos, startCol); + return; + } + + // Strings + if (c == 39 || c == 34) { + // single quote=39, double quote=34 + this.#scanString(startPos, startCol); + return; + } + + // Template literals + if (c == 96) { + // backtick=96 + this.#advance(); + this.#scanTemplatePart(false, startPos, startCol); + return; + } + + // Operators and punctuation + this.#scanOperator(startPos, startCol); + } + + // ============================================================================ + // Number scanning + // ============================================================================ + + #scanNumber(startPos: i32, startCol: i32): void { + let c = this.#peek(); + let builder = new StringBuilder(); + + // Check for hex literal (0x or 0X) + if (c == 48 && (this.#peekNext() == 120 || this.#peekNext() == 88)) { + // '0'=48, 'x'=120, 'X'=88 + builder.appendByte(this.#advance()); // '0' + builder.appendByte(this.#advance()); // 'x' or 'X' + + while (!this.#reader.isAtEnd && isHexDigit(this.#peek())) { + builder.appendByte(this.#advance()); + } + } else { + // Decimal number + while (!this.#reader.isAtEnd && isDigit(this.#peek())) { + builder.appendByte(this.#advance()); + } + + // Fractional part + if (this.#peek() == 46 && isDigit(this.#peekNext())) { + // '.'=46 + builder.appendByte(this.#advance()); // '.' + while (!this.#reader.isAtEnd && isDigit(this.#peek())) { + builder.appendByte(this.#advance()); + } + } + } + + this.#addToken(TokenType.Number, builder.toString(), startPos, startCol); + } + + // ============================================================================ + // Identifier scanning + // ============================================================================ + + #scanIdentifier(startPos: i32, startCol: i32): void { + let start = this.#reader.mark(); + + while (!this.#reader.isAtEnd && isIdentifierPart(this.#peek())) { + this.#advance(); + } + + let value = this.#reader.sliceFrom(start); + let (tokenType, found) = KEYWORDS.get(value); + if (found) { + this.#addToken(tokenType, value, startPos, startCol); + } else { + this.#addToken(TokenType.Identifier, value, startPos, startCol); + } + } + + // ============================================================================ + // String scanning + // ============================================================================ + + #scanString(startPos: i32, startCol: i32): void { + let quote = this.#advance(); + let builder = new StringBuilder(); + + while (!this.#reader.isAtEnd && this.#peek() != quote) { + if (this.#peek() == 92) { + // backslash=92 + this.#advance(); + if (this.#reader.isAtEnd) { + break; + } + let escaped = this.#advance(); + // 'n'=110, 'r'=114, 't'=116, '\\'=92, '\''=39, '"'=34 + if (escaped == 110) { + builder.appendByte(10); // newline + } else if (escaped == 114) { + builder.appendByte(13); // carriage return + } else if (escaped == 116) { + builder.appendByte(9); // tab + } else if (escaped == 92) { + builder.appendByte(92); // backslash + } else if (escaped == 39) { + builder.appendByte(39); // single quote + } else if (escaped == 34) { + builder.appendByte(34); // double quote + } else { + // Unknown escape - keep as-is + builder.appendByte(92); + builder.appendByte(escaped); + } + } else { + builder.appendByte(this.#advance()); + } + } + + if (!this.#reader.isAtEnd) { + this.#advance(); // closing quote + } + + this.#addToken(TokenType.String, builder.toString(), startPos, startCol); + } + + // ============================================================================ + // Template literal scanning + // ============================================================================ + + #scanTemplatePart(isMiddleOrTail: boolean, startPos: i32, startCol: i32): void { + let cooked = new StringBuilder(); + let raw = new StringBuilder(); + + while (!this.#reader.isAtEnd) { + let c = this.#peek(); + + // End of template + if (c == 96) { + // backtick=96 + this.#advance(); + var tokenType = TokenType.NoSubstitutionTemplate; + if (isMiddleOrTail) { + tokenType = TokenType.TemplateTail; + } + this.#addTemplateToken(tokenType, cooked.toString(), raw.toString(), startPos, startCol); + return; + } + + // Template expression start + if (c == 36 && this.#peekNext() == 123) { + // '$'=36, '{'=123 + this.#advance(); // $ + this.#advance(); // { + this.#templateStack.push(1); // start tracking braces + var tokenType = TokenType.TemplateHead; + if (isMiddleOrTail) { + tokenType = TokenType.TemplateMiddle; + } + this.#addTemplateToken(tokenType, cooked.toString(), raw.toString(), startPos, startCol); + return; + } + + // Escape sequence + if (c == 92) { + // backslash=92 + raw.appendByte(this.#advance()); + if (!this.#reader.isAtEnd) { + let next = this.#peek(); + raw.appendByte(next); + this.#processEscape(cooked); + } + } else { + let ch = this.#advance(); + cooked.appendByte(ch); + raw.appendByte(ch); + } + } + + // Reached end without closing + var tokenType = TokenType.NoSubstitutionTemplate; + if (isMiddleOrTail) { + tokenType = TokenType.TemplateTail; + } + this.#addTemplateToken(tokenType, cooked.toString(), raw.toString(), startPos, startCol); + } + + #processEscape(builder: StringBuilder): void { + if (this.#reader.isAtEnd) { + return; + } + let escaped = this.#advance(); + // 'n'=110, 'r'=114, 't'=116, '\\'=92, '`'=96, '$'=36, '0'=48 + if (escaped == 110) { + builder.appendByte(10); // newline + } else if (escaped == 114) { + builder.appendByte(13); // carriage return + } else if (escaped == 116) { + builder.appendByte(9); // tab + } else if (escaped == 92) { + builder.appendByte(92); // backslash + } else if (escaped == 96) { + builder.appendByte(96); // backtick + } else if (escaped == 36) { + builder.appendByte(36); // dollar + } else if (escaped == 48) { + builder.appendByte(0); // null + } else { + // Unknown escape - keep as-is + builder.appendByte(92); + builder.appendByte(escaped); + } + } + + // ============================================================================ + // Operator scanning + // ============================================================================ + + #scanOperator(startPos: i32, startCol: i32): void { + let c = this.#advance(); + + // '='=61, '>'=62 + if (c == 61) { + if (this.#peek() == 62) { + this.#advance(); + this.#addToken(TokenType.Arrow, '=>', startPos, startCol); + } else if (this.#peek() == 61) { + this.#advance(); + if (this.#peek() == 61) { + this.#advance(); + this.#addToken(TokenType.EqualsEqualsEquals, '===', startPos, startCol); + } else { + this.#addToken(TokenType.EqualsEquals, '==', startPos, startCol); + } + } else { + this.#addToken(TokenType.Equals, '=', startPos, startCol); + } + return; + } + + // '!'=33 + if (c == 33) { + if (this.#peek() == 61) { + this.#advance(); + if (this.#peek() == 61) { + this.#advance(); + this.#addToken(TokenType.BangEqualsEquals, '!==', startPos, startCol); + } else { + this.#addToken(TokenType.BangEquals, '!=', startPos, startCol); + } + } else { + this.#addToken(TokenType.Bang, '!', startPos, startCol); + } + return; + } + + // '<'=60 + if (c == 60) { + if (this.#peek() == 61) { + this.#advance(); + this.#addToken(TokenType.LessEquals, '<=', startPos, startCol); + } else if (this.#peek() == 60) { + this.#advance(); + this.#addToken(TokenType.LessLess, '<<', startPos, startCol); + } else { + this.#addToken(TokenType.Less, '<', startPos, startCol); + } + return; + } + + // '>'=62 + if (c == 62) { + if (this.#peek() == 61) { + this.#advance(); + this.#addToken(TokenType.GreaterEquals, '>=', startPos, startCol); + } else if (this.#peek() == 62) { + this.#advance(); + if (this.#peek() == 62) { + this.#advance(); + this.#addToken(TokenType.GreaterGreaterGreater, '>>>', startPos, startCol); + } else { + this.#addToken(TokenType.GreaterGreater, '>>', startPos, startCol); + } + } else { + this.#addToken(TokenType.Greater, '>', startPos, startCol); + } + return; + } + + // '+'=43 + if (c == 43) { + this.#addToken(TokenType.Plus, '+', startPos, startCol); + return; + } + + // '-'=45 + if (c == 45) { + this.#addToken(TokenType.Minus, '-', startPos, startCol); + return; + } + + // '*'=42 + if (c == 42) { + this.#addToken(TokenType.Star, '*', startPos, startCol); + return; + } + + // '/'=47 + if (c == 47) { + if (this.#peek() == 47) { + // Single-line comment + while (!this.#reader.isAtEnd && this.#peek() != 10) { + this.#advance(); + } + return; + } else if (this.#peek() == 42) { + // Multi-line comment + this.#advance(); // consume '*' + while (!this.#reader.isAtEnd) { + if (this.#peek() == 42 && this.#peekNext() == 47) { + this.#advance(); // consume '*' + this.#advance(); // consume '/' + break; + } + this.#advance(); + } + return; + } else { + this.#addToken(TokenType.Slash, '/', startPos, startCol); + } + return; + } + + // '%'=37 + if (c == 37) { + this.#addToken(TokenType.Percent, '%', startPos, startCol); + return; + } + + // '|'=124 + if (c == 124) { + if (this.#peek() == 124) { + this.#advance(); + this.#addToken(TokenType.PipePipe, '||', startPos, startCol); + } else if (this.#peek() == 62) { + this.#advance(); + this.#addToken(TokenType.PipeGreater, '|>', startPos, startCol); + } else { + this.#addToken(TokenType.Pipe, '|', startPos, startCol); + } + return; + } + + // '&'=38 + if (c == 38) { + if (this.#peek() == 38) { + this.#advance(); + this.#addToken(TokenType.AmpersandAmpersand, '&&', startPos, startCol); + } else { + this.#addToken(TokenType.Ampersand, '&', startPos, startCol); + } + return; + } + + // '^'=94 + if (c == 94) { + this.#addToken(TokenType.Caret, '^', startPos, startCol); + return; + } + + // '?'=63 + if (c == 63) { + this.#addToken(TokenType.Question, '?', startPos, startCol); + return; + } + + // '('=40 + if (c == 40) { + this.#addToken(TokenType.LParen, '(', startPos, startCol); + return; + } + + // ')'=41 + if (c == 41) { + this.#addToken(TokenType.RParen, ')', startPos, startCol); + return; + } + + // '{'=123 + if (c == 123) { + // Track brace depth for template expressions + if (this.#templateStack.length > 0) { + let lastIdx = this.#templateStack.length - 1; + let depth = this.#templateStack[lastIdx]; + this.#templateStack[lastIdx] = depth + 1; + } + this.#addToken(TokenType.LBrace, '{', startPos, startCol); + return; + } + + // '}'=125 + if (c == 125) { + // Check if this closes a template expression + if (this.#templateStack.length > 0) { + let lastIdx = this.#templateStack.length - 1; + let depth = this.#templateStack[lastIdx]; + if (depth == 1) { + this.#templateStack.pop(); + // Continue scanning template + this.#scanTemplatePart(true, startPos, startCol); + return; + } else { + this.#templateStack[lastIdx] = depth - 1; + } + } + this.#addToken(TokenType.RBrace, '}', startPos, startCol); + return; + } + + // '['=91 + if (c == 91) { + this.#addToken(TokenType.LBracket, '[', startPos, startCol); + return; + } + + // ']'=93 + if (c == 93) { + this.#addToken(TokenType.RBracket, ']', startPos, startCol); + return; + } + + // ':'=58 + if (c == 58) { + this.#addToken(TokenType.Colon, ':', startPos, startCol); + return; + } + + // ';'=59 + if (c == 59) { + this.#addToken(TokenType.Semi, ';', startPos, startCol); + return; + } + + // ','=44 + if (c == 44) { + this.#addToken(TokenType.Comma, ',', startPos, startCol); + return; + } + + // '.'=46 + if (c == 46) { + if (this.#peek() == 46 && this.#peekNext() == 46) { + this.#advance(); + this.#advance(); + this.#addToken(TokenType.DotDotDot, '...', startPos, startCol); + } else if (this.#peek() == 46) { + this.#advance(); + this.#addToken(TokenType.DotDot, '..', startPos, startCol); + } else { + this.#addToken(TokenType.Dot, '.', startPos, startCol); + } + return; + } + + // '#'=35 + if (c == 35) { + this.#addToken(TokenType.Hash, '#', startPos, startCol); + return; + } + + // '@'=64 + if (c == 64) { + this.#addToken(TokenType.At, '@', startPos, startCol); + return; + } + + // Unknown character + let unknown = new StringBuilder(); + unknown.appendByte(c); + this.#addToken(TokenType.Unknown, unknown.toString(), startPos, startCol); + } +} + +// ============================================================================ +// Convenience function +// ============================================================================ + +export let tokenize = (source: string): Array => { + let tokenizer = new Tokenizer(source); + return tokenizer.tokenize(); +}; diff --git a/packages/zena-compiler/zena/hello_test.zena b/packages/zena-compiler/zena/test/hello_test.zena similarity index 100% rename from packages/zena-compiler/zena/hello_test.zena rename to packages/zena-compiler/zena/test/hello_test.zena diff --git a/packages/zena-compiler/zena/test/tokenizer_test.zena b/packages/zena-compiler/zena/test/tokenizer_test.zena new file mode 100644 index 00000000..d3c6f5dc --- /dev/null +++ b/packages/zena-compiler/zena/test/tokenizer_test.zena @@ -0,0 +1,473 @@ +// Tokenizer tests +// Ported from packages/compiler/src/test/lexer/ + +import { suite, test, TestContext } from 'zena:test'; +import { equal, isTrue } from 'zena:assert'; +import { Array } from 'zena:array'; +import { tokenize, Token, TokenType } from '../lib/tokenizer.zena'; + +// ============================================================================ +// Test Helpers +// ============================================================================ + +// Helper to check token type and value +let assertToken = (token: Token, expectedType: TokenType, expectedValue: string): void => { + equal(token.kind as i32, expectedType as i32); + equal(token.value, expectedValue); +}; + +// Helper to check just token type +let assertTokenType = (token: Token, expectedType: TokenType): void => { + equal(token.kind as i32, expectedType as i32); +}; + +// ============================================================================ +// Tests +// ============================================================================ + +export let tests = suite('Tokenizer', (): void => { + + // ========================================================================== + // Hex Literals + // ========================================================================== + + suite('Hex Literals', (): void => { + test('should tokenize hex integer literals', (ctx: TestContext): void => { + let input = '0x123 0XABC 0x0 0xFF'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.Number, '0x123'); + assertToken(tokens[1], TokenType.Number, '0XABC'); + assertToken(tokens[2], TokenType.Number, '0x0'); + assertToken(tokens[3], TokenType.Number, '0xFF'); + assertTokenType(tokens[4], TokenType.EOF); + + equal(tokens.length, 5); + }); + + test('should tokenize hex integer literals mixed with other tokens', (ctx: TestContext): void => { + let input = 'let x = 0x1A;'; + let tokens = tokenize(input); + + assertTokenType(tokens[0], TokenType.Let); + assertToken(tokens[1], TokenType.Identifier, 'x'); + assertTokenType(tokens[2], TokenType.Equals); + assertToken(tokens[3], TokenType.Number, '0x1A'); + assertTokenType(tokens[4], TokenType.Semi); + assertTokenType(tokens[5], TokenType.EOF); + + equal(tokens.length, 6); + }); + }); + + // ========================================================================== + // Basic Tokens + // ========================================================================== + + suite('Basic Tokens', (): void => { + test('should tokenize keywords', (ctx: TestContext): void => { + let input = 'let var class if else while for return'; + let tokens = tokenize(input); + + assertTokenType(tokens[0], TokenType.Let); + assertTokenType(tokens[1], TokenType.Var); + assertTokenType(tokens[2], TokenType.Class); + assertTokenType(tokens[3], TokenType.If); + assertTokenType(tokens[4], TokenType.Else); + assertTokenType(tokens[5], TokenType.While); + assertTokenType(tokens[6], TokenType.For); + assertTokenType(tokens[7], TokenType.Return); + assertTokenType(tokens[8], TokenType.EOF); + + equal(tokens.length, 9); + }); + + test('should tokenize identifiers', (ctx: TestContext): void => { + let input = 'foo bar_baz $test _private'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.Identifier, 'foo'); + assertToken(tokens[1], TokenType.Identifier, 'bar_baz'); + assertToken(tokens[2], TokenType.Identifier, '$test'); + assertToken(tokens[3], TokenType.Identifier, '_private'); + assertTokenType(tokens[4], TokenType.EOF); + + equal(tokens.length, 5); + }); + + test('should tokenize decimal numbers', (ctx: TestContext): void => { + let input = '123 456 0 42'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.Number, '123'); + assertToken(tokens[1], TokenType.Number, '456'); + assertToken(tokens[2], TokenType.Number, '0'); + assertToken(tokens[3], TokenType.Number, '42'); + assertTokenType(tokens[4], TokenType.EOF); + + equal(tokens.length, 5); + }); + + test('should tokenize floating point numbers', (ctx: TestContext): void => { + let input = '3.14 0.5 123.456'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.Number, '3.14'); + assertToken(tokens[1], TokenType.Number, '0.5'); + assertToken(tokens[2], TokenType.Number, '123.456'); + assertTokenType(tokens[3], TokenType.EOF); + + equal(tokens.length, 4); + }); + }); + + // ========================================================================== + // Strings + // ========================================================================== + + suite('Strings', (): void => { + test('should tokenize single-quoted strings', (ctx: TestContext): void => { + let input = "'hello' 'world'"; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.String, 'hello'); + assertToken(tokens[1], TokenType.String, 'world'); + assertTokenType(tokens[2], TokenType.EOF); + + equal(tokens.length, 3); + }); + + test('should tokenize double-quoted strings', (ctx: TestContext): void => { + let input = '"hello" "world"'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.String, 'hello'); + assertToken(tokens[1], TokenType.String, 'world'); + assertTokenType(tokens[2], TokenType.EOF); + + equal(tokens.length, 3); + }); + + test('should handle escape sequences', (ctx: TestContext): void => { + // Test newline escape + let input = '"hello\\nworld"'; + let tokens = tokenize(input); + // The value should contain an actual newline character + equal(tokens[0].value.length, 11); // "hello" + newline + "world" + assertTokenType(tokens[0], TokenType.String); + }); + }); + + // ========================================================================== + // Operators + // ========================================================================== + + suite('Operators', (): void => { + test('should tokenize comparison operators', (ctx: TestContext): void => { + let input = '== != === !== < > <= >='; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.EqualsEquals, '=='); + assertToken(tokens[1], TokenType.BangEquals, '!='); + assertToken(tokens[2], TokenType.EqualsEqualsEquals, '==='); + assertToken(tokens[3], TokenType.BangEqualsEquals, '!=='); + assertToken(tokens[4], TokenType.Less, '<'); + assertToken(tokens[5], TokenType.Greater, '>'); + assertToken(tokens[6], TokenType.LessEquals, '<='); + assertToken(tokens[7], TokenType.GreaterEquals, '>='); + assertTokenType(tokens[8], TokenType.EOF); + + equal(tokens.length, 9); + }); + + test('should tokenize arithmetic operators', (ctx: TestContext): void => { + let input = '+ - * / %'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.Plus, '+'); + assertToken(tokens[1], TokenType.Minus, '-'); + assertToken(tokens[2], TokenType.Star, '*'); + assertToken(tokens[3], TokenType.Slash, '/'); + assertToken(tokens[4], TokenType.Percent, '%'); + assertTokenType(tokens[5], TokenType.EOF); + + equal(tokens.length, 6); + }); + + test('should tokenize logical operators', (ctx: TestContext): void => { + let input = '&& || !'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.AmpersandAmpersand, '&&'); + assertToken(tokens[1], TokenType.PipePipe, '||'); + assertToken(tokens[2], TokenType.Bang, '!'); + assertTokenType(tokens[3], TokenType.EOF); + + equal(tokens.length, 4); + }); + + test('should tokenize bitwise operators', (ctx: TestContext): void => { + let input = '& | ^ << >> >>>'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.Ampersand, '&'); + assertToken(tokens[1], TokenType.Pipe, '|'); + assertToken(tokens[2], TokenType.Caret, '^'); + assertToken(tokens[3], TokenType.LessLess, '<<'); + assertToken(tokens[4], TokenType.GreaterGreater, '>>'); + assertToken(tokens[5], TokenType.GreaterGreaterGreater, '>>>'); + assertTokenType(tokens[6], TokenType.EOF); + + equal(tokens.length, 7); + }); + + test('should tokenize arrow', (ctx: TestContext): void => { + let input = '=>'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.Arrow, '=>'); + assertTokenType(tokens[1], TokenType.EOF); + + equal(tokens.length, 2); + }); + + test('should tokenize pipe greater', (ctx: TestContext): void => { + let input = '|>'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.PipeGreater, '|>'); + assertTokenType(tokens[1], TokenType.EOF); + + equal(tokens.length, 2); + }); + }); + + // ========================================================================== + // Punctuation + // ========================================================================== + + suite('Punctuation', (): void => { + test('should tokenize brackets and braces', (ctx: TestContext): void => { + let input = '( ) { } [ ]'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.LParen, '('); + assertToken(tokens[1], TokenType.RParen, ')'); + assertToken(tokens[2], TokenType.LBrace, '{'); + assertToken(tokens[3], TokenType.RBrace, '}'); + assertToken(tokens[4], TokenType.LBracket, '['); + assertToken(tokens[5], TokenType.RBracket, ']'); + assertTokenType(tokens[6], TokenType.EOF); + + equal(tokens.length, 7); + }); + + test('should tokenize delimiters', (ctx: TestContext): void => { + let input = ': ; , .'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.Colon, ':'); + assertToken(tokens[1], TokenType.Semi, ';'); + assertToken(tokens[2], TokenType.Comma, ','); + assertToken(tokens[3], TokenType.Dot, '.'); + assertTokenType(tokens[4], TokenType.EOF); + + equal(tokens.length, 5); + }); + + test('should tokenize dot variations', (ctx: TestContext): void => { + let input = '. .. ...'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.Dot, '.'); + assertToken(tokens[1], TokenType.DotDot, '..'); + assertToken(tokens[2], TokenType.DotDotDot, '...'); + assertTokenType(tokens[3], TokenType.EOF); + + equal(tokens.length, 4); + }); + + test('should tokenize hash and at', (ctx: TestContext): void => { + let input = '# @'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.Hash, '#'); + assertToken(tokens[1], TokenType.At, '@'); + assertTokenType(tokens[2], TokenType.EOF); + + equal(tokens.length, 3); + }); + }); + + // ========================================================================== + // Comments + // ========================================================================== + + suite('Comments', (): void => { + test('should skip single-line comments', (ctx: TestContext): void => { + let input = 'let x // this is a comment += 5'; + let tokens = tokenize(input); + + assertTokenType(tokens[0], TokenType.Let); + assertToken(tokens[1], TokenType.Identifier, 'x'); + assertToken(tokens[2], TokenType.Equals, '='); + assertToken(tokens[3], TokenType.Number, '5'); + assertTokenType(tokens[4], TokenType.EOF); + + equal(tokens.length, 5); + }); + + test('should skip multi-line comments', (ctx: TestContext): void => { + let input = 'let /* comment */ x = 5'; + let tokens = tokenize(input); + + assertTokenType(tokens[0], TokenType.Let); + assertToken(tokens[1], TokenType.Identifier, 'x'); + assertToken(tokens[2], TokenType.Equals, '='); + assertToken(tokens[3], TokenType.Number, '5'); + assertTokenType(tokens[4], TokenType.EOF); + + equal(tokens.length, 5); + }); + }); + + // ========================================================================== + // Template Literals + // ========================================================================== + + suite('Template Literals', (): void => { + test('should tokenize simple template literal', (ctx: TestContext): void => { + let input = '`hello world`'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.NoSubstitutionTemplate, 'hello world'); + assertTokenType(tokens[1], TokenType.EOF); + + equal(tokens.length, 2); + }); + + test('should tokenize template literal with expression', (ctx: TestContext): void => { + // Note: Using escaped backtick for the test string + let input = '`hello ${name}!`'; + let tokens = tokenize(input); + + // TemplateHead: "hello " + assertToken(tokens[0], TokenType.TemplateHead, 'hello '); + // Identifier: name + assertToken(tokens[1], TokenType.Identifier, 'name'); + // TemplateTail: "!" + assertToken(tokens[2], TokenType.TemplateTail, '!'); + assertTokenType(tokens[3], TokenType.EOF); + + equal(tokens.length, 4); + }); + + test('should tokenize template literal with multiple expressions', (ctx: TestContext): void => { + let input = '`${a}+${b}=${c}`'; + let tokens = tokenize(input); + + assertToken(tokens[0], TokenType.TemplateHead, ''); + assertToken(tokens[1], TokenType.Identifier, 'a'); + assertToken(tokens[2], TokenType.TemplateMiddle, '+'); + assertToken(tokens[3], TokenType.Identifier, 'b'); + assertToken(tokens[4], TokenType.TemplateMiddle, '='); + assertToken(tokens[5], TokenType.Identifier, 'c'); + assertToken(tokens[6], TokenType.TemplateTail, ''); + assertTokenType(tokens[7], TokenType.EOF); + + equal(tokens.length, 8); + }); + }); + + // ========================================================================== + // Complex Expressions + // ========================================================================== + + suite('Complex Expressions', (): void => { + test('should tokenize function declaration', (ctx: TestContext): void => { + let input = 'const add = (a: i32, b: i32) => a + b;'; + let tokens = tokenize(input); + + // const -> Identifier (not a keyword in Zena lexer) + assertToken(tokens[0], TokenType.Identifier, 'const'); + assertToken(tokens[1], TokenType.Identifier, 'add'); + assertToken(tokens[2], TokenType.Equals, '='); + assertToken(tokens[3], TokenType.LParen, '('); + assertToken(tokens[4], TokenType.Identifier, 'a'); + assertToken(tokens[5], TokenType.Colon, ':'); + assertToken(tokens[6], TokenType.Identifier, 'i32'); + assertToken(tokens[7], TokenType.Comma, ','); + assertToken(tokens[8], TokenType.Identifier, 'b'); + assertToken(tokens[9], TokenType.Colon, ':'); + assertToken(tokens[10], TokenType.Identifier, 'i32'); + assertToken(tokens[11], TokenType.RParen, ')'); + assertToken(tokens[12], TokenType.Arrow, '=>'); + assertToken(tokens[13], TokenType.Identifier, 'a'); + assertToken(tokens[14], TokenType.Plus, '+'); + assertToken(tokens[15], TokenType.Identifier, 'b'); + assertToken(tokens[16], TokenType.Semi, ';'); + assertTokenType(tokens[17], TokenType.EOF); + + equal(tokens.length, 18); + }); + + test('should tokenize class declaration', (ctx: TestContext): void => { + let input = 'class Point { x: i32; y: i32; }'; + let tokens = tokenize(input); + + assertTokenType(tokens[0], TokenType.Class); + assertToken(tokens[1], TokenType.Identifier, 'Point'); + assertToken(tokens[2], TokenType.LBrace, '{'); + assertToken(tokens[3], TokenType.Identifier, 'x'); + assertToken(tokens[4], TokenType.Colon, ':'); + assertToken(tokens[5], TokenType.Identifier, 'i32'); + assertToken(tokens[6], TokenType.Semi, ';'); + assertToken(tokens[7], TokenType.Identifier, 'y'); + assertToken(tokens[8], TokenType.Colon, ':'); + assertToken(tokens[9], TokenType.Identifier, 'i32'); + assertToken(tokens[10], TokenType.Semi, ';'); + assertToken(tokens[11], TokenType.RBrace, '}'); + assertTokenType(tokens[12], TokenType.EOF); + + equal(tokens.length, 13); + }); + }); + + // ========================================================================== + // Token Positions + // ========================================================================== + + suite('Token Positions', (): void => { + test('should track line and column correctly', (ctx: TestContext): void => { + let input = 'let x +let y'; + let tokens = tokenize(input); + + // First line: let x + equal(tokens[0].line, 1); + equal(tokens[0].column, 1); + equal(tokens[1].line, 1); + equal(tokens[1].column, 5); + + // Second line: let y + equal(tokens[2].line, 2); + equal(tokens[2].column, 1); + equal(tokens[3].line, 2); + equal(tokens[3].column, 5); + }); + + test('should track start and end positions', (ctx: TestContext): void => { + let input = 'let foo = 123;'; + let tokens = tokenize(input); + + // 'let' spans positions 0-3 + equal(tokens[0].start, 0); + equal(tokens[0].end, 3); + + // 'foo' spans positions 4-7 + equal(tokens[1].start, 4); + equal(tokens[1].end, 7); + }); + }); +});