How a Tiny Compiler Works: From Tokenizer to Code Generator
This article walks through the core concepts and implementation steps of a minimal JavaScript compiler, covering tokenization, parsing into an AST, traversing with a visitor, transforming the tree, and generating target code, while also explaining the visitor pattern and polyfills.
Introduction
In modern front‑end development Babel is used to compile newer ES6+ syntax to widely supported ES5. The article uses the open‑source the‑super‑tiny‑compiler project to illustrate how a compiler works, converting Lisp‑style function calls into C‑style calls.
Compiler Workflow
The compilation process consists of three stages: parsing (lexical analysis and syntactic analysis), transformation, and code generation.
Tokenizer (Lexical Analysis)
function tokenizer(input) {
let current = 0;
let tokens = [];
while (current < input.length) {
let char = input[current];
if (char === '(') {
tokens.push({type: 'paren', value: '('});
current++;
continue;
}
if (char === ')') {
tokens.push({type: 'paren', value: ')'});
current++;
continue;
}
let WHITESPACE = /\s/;
if (WHITESPACE.test(char)) {
current++;
continue;
}
let NUMBERS = /[0-9]/;
if (NUMBERS.test(char)) {
let value = '';
while (NUMBERS.test(char)) {
value += char;
char = input[++current];
}
tokens.push({type: 'number', value});
continue;
}
if (char === '"') {
let value = '';
char = input[++current];
while (char !== '"') {
value += char;
char = input[++current];
}
char = input[++current];
tokens.push({type: 'string', value});
continue;
}
let LETTERS = /[a-z]/i;
if (LETTERS.test(char)) {
let value = '';
while (LETTERS.test(char)) {
value += char;
char = input[++current];
}
tokens.push({type: 'name', value});
continue;
}
throw new TypeError(`I dont know what this character is: ${char}`);
}
return tokens;
}Parser (Syntactic Analysis)
function parser(tokens) {
let current = 0;
function walk() {
let token = tokens[current];
if (token.type === 'number') {
current++;
return {type: 'NumberLiteral', value: token.value};
}
if (token.type === 'string') {
current++;
return {type: 'StringLiteral', value: token.value};
}
if (token.type === 'paren' && token.value === '(') {
token = tokens[++current];
let node = {type: 'CallExpression', name: token.value, params: []};
token = tokens[++current];
while (token.type !== 'paren' || token.value !== ')') {
node.params.push(walk());
token = tokens[current];
}
current++;
return node;
}
throw new TypeError(token.type);
}
let ast = {type: 'Program', body: []};
while (current < tokens.length) {
ast.body.push(walk());
}
return ast;
}Traverser
function traverser(ast, visitor) {
function traverseArray(array, parent) {
array.forEach(child => traverseNode(child, parent));
}
function traverseNode(node, parent) {
let methods = visitor[node.type];
if (methods && methods.enter) methods.enter(node, parent);
switch (node.type) {
case 'Program':
traverseArray(node.body, node);
break;
case 'CallExpression':
traverseArray(node.params, node);
break;
case 'NumberLiteral':
case 'StringLiteral':
break;
default:
throw new TypeError(node.type);
}
if (methods && methods.exit) methods.exit(node, parent);
}
traverseNode(ast, null);
}Transformer
function transformer(ast) {
let newAst = {type: 'Program', body: []};
ast._context = newAst.body;
traverser(ast, {
NumberLiteral: {
enter(node, parent) {
parent._context.push({type: 'NumberLiteral', value: node.value});
}
},
StringLiteral: {
enter(node, parent) {
parent._context.push({type: 'StringLiteral', value: node.value});
}
},
CallExpression: {
enter(node, parent) {
let expression = {
type: 'CallExpression',
callee: {type: 'Identifier', name: node.name},
arguments: []
};
node._context = expression.arguments;
if (parent.type !== 'CallExpression') {
expression = {type: 'ExpressionStatement', expression};
}
parent._context.push(expression);
}
}
});
return newAst;
}Code Generator
function codeGenerator(node) {
switch (node.type) {
case 'Program':
return node.body.map(codeGenerator).join('
');
case 'ExpressionStatement':
return `${codeGenerator(node.expression)};`;
case 'CallExpression':
return `${codeGenerator(node.callee)}(${node.arguments.map(codeGenerator).join(', ')})`;
case 'Identifier':
return node.name;
case 'NumberLiteral':
return node.value;
case 'StringLiteral':
return `"${node.value}"`;
default:
throw new TypeError(node.type);
}
}Visitor Pattern Overview
The visitor pattern separates operations from the object structure they work on. In the compiler, a visitor object provides enter and exit methods for each AST node type, allowing transformation logic to be added without modifying the node classes themselves.
Polyfill Example
(function (window) {
if (window.incompatibleFeature) {
return window.incompatibleFeature;
} else {
window.incompatibleFeature = function () {
// compatibility code
};
}
})(window);Signed-in readers can open the original source through BestHub's protected redirect.
This article has been distilled and summarized from source material, then republished for learning and reference. If you believe it infringes your rights, please contactand we will review it promptly.
How this landed with the community
Was this worth your time?
0 Comments
Thoughtful readers leave field notes, pushback, and hard-won operational detail here.
