Fundamentals 17 min read

How a Tiny Compiler Works: From Tokenizer to Code Generator

This article walks through the core concepts and implementation steps of a minimal JavaScript compiler, covering tokenization, parsing into an AST, traversing with a visitor, transforming the tree, and generating target code, while also explaining the visitor pattern and polyfills.

ELab Team
ELab Team
ELab Team
How a Tiny Compiler Works: From Tokenizer to Code Generator

Introduction

In modern front‑end development Babel is used to compile newer ES6+ syntax to widely supported ES5. The article uses the open‑source the‑super‑tiny‑compiler project to illustrate how a compiler works, converting Lisp‑style function calls into C‑style calls.

Compiler Workflow

The compilation process consists of three stages: parsing (lexical analysis and syntactic analysis), transformation, and code generation.

Tokenizer (Lexical Analysis)

function tokenizer(input) {
  let current = 0;
  let tokens = [];
  while (current < input.length) {
    let char = input[current];
    if (char === '(') {
      tokens.push({type: 'paren', value: '('});
      current++;
      continue;
    }
    if (char === ')') {
      tokens.push({type: 'paren', value: ')'});
      current++;
      continue;
    }
    let WHITESPACE = /\s/;
    if (WHITESPACE.test(char)) {
      current++;
      continue;
    }
    let NUMBERS = /[0-9]/;
    if (NUMBERS.test(char)) {
      let value = '';
      while (NUMBERS.test(char)) {
        value += char;
        char = input[++current];
      }
      tokens.push({type: 'number', value});
      continue;
    }
    if (char === '"') {
      let value = '';
      char = input[++current];
      while (char !== '"') {
        value += char;
        char = input[++current];
      }
      char = input[++current];
      tokens.push({type: 'string', value});
      continue;
    }
    let LETTERS = /[a-z]/i;
    if (LETTERS.test(char)) {
      let value = '';
      while (LETTERS.test(char)) {
        value += char;
        char = input[++current];
      }
      tokens.push({type: 'name', value});
      continue;
    }
    throw new TypeError(`I dont know what this character is: ${char}`);
  }
  return tokens;
}

Parser (Syntactic Analysis)

function parser(tokens) {
  let current = 0;
  function walk() {
    let token = tokens[current];
    if (token.type === 'number') {
      current++;
      return {type: 'NumberLiteral', value: token.value};
    }
    if (token.type === 'string') {
      current++;
      return {type: 'StringLiteral', value: token.value};
    }
    if (token.type === 'paren' && token.value === '(') {
      token = tokens[++current];
      let node = {type: 'CallExpression', name: token.value, params: []};
      token = tokens[++current];
      while (token.type !== 'paren' || token.value !== ')') {
        node.params.push(walk());
        token = tokens[current];
      }
      current++;
      return node;
    }
    throw new TypeError(token.type);
  }
  let ast = {type: 'Program', body: []};
  while (current < tokens.length) {
    ast.body.push(walk());
  }
  return ast;
}

Traverser

function traverser(ast, visitor) {
  function traverseArray(array, parent) {
    array.forEach(child => traverseNode(child, parent));
  }
  function traverseNode(node, parent) {
    let methods = visitor[node.type];
    if (methods && methods.enter) methods.enter(node, parent);
    switch (node.type) {
      case 'Program':
        traverseArray(node.body, node);
        break;
      case 'CallExpression':
        traverseArray(node.params, node);
        break;
      case 'NumberLiteral':
      case 'StringLiteral':
        break;
      default:
        throw new TypeError(node.type);
    }
    if (methods && methods.exit) methods.exit(node, parent);
  }
  traverseNode(ast, null);
}

Transformer

function transformer(ast) {
  let newAst = {type: 'Program', body: []};
  ast._context = newAst.body;
  traverser(ast, {
    NumberLiteral: {
      enter(node, parent) {
        parent._context.push({type: 'NumberLiteral', value: node.value});
      }
    },
    StringLiteral: {
      enter(node, parent) {
        parent._context.push({type: 'StringLiteral', value: node.value});
      }
    },
    CallExpression: {
      enter(node, parent) {
        let expression = {
          type: 'CallExpression',
          callee: {type: 'Identifier', name: node.name},
          arguments: []
        };
        node._context = expression.arguments;
        if (parent.type !== 'CallExpression') {
          expression = {type: 'ExpressionStatement', expression};
        }
        parent._context.push(expression);
      }
    }
  });
  return newAst;
}

Code Generator

function codeGenerator(node) {
  switch (node.type) {
    case 'Program':
      return node.body.map(codeGenerator).join('
');
    case 'ExpressionStatement':
      return `${codeGenerator(node.expression)};`;
    case 'CallExpression':
      return `${codeGenerator(node.callee)}(${node.arguments.map(codeGenerator).join(', ')})`;
    case 'Identifier':
      return node.name;
    case 'NumberLiteral':
      return node.value;
    case 'StringLiteral':
      return `"${node.value}"`;
    default:
      throw new TypeError(node.type);
  }
}

Visitor Pattern Overview

The visitor pattern separates operations from the object structure they work on. In the compiler, a visitor object provides enter and exit methods for each AST node type, allowing transformation logic to be added without modifying the node classes themselves.

Polyfill Example

(function (window) {
  if (window.incompatibleFeature) {
    return window.incompatibleFeature;
  } else {
    window.incompatibleFeature = function () {
      // compatibility code
    };
  }
})(window);
Compiler workflow diagram
Compiler workflow diagram
Visitor pattern diagram
Visitor pattern diagram
Original Source

Signed-in readers can open the original source through BestHub's protected redirect.

Sign in to view source
Republication Notice

This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactadmin@besthub.devand we will review it promptly.

JavaScriptASTCompilerParserVisitor Pattern
ELab Team
Written by

ELab Team

Sharing fresh technical insights

0 followers
Reader feedback

How this landed with the community

Sign in to like

Rate this article

Was this worth your time?

Sign in to rate
Discussion

0 Comments

Thoughtful readers leave field notes, pushback, and hard-won operational detail here.