HTML5 animation using a texture atlas


Your browser has no canvas control support!
In last week’s tutorial we used the canvas control to make a character’s eyes follow the mouse. This week Nelly the elephant is back again. This time we will add some animations and use a texture atlas.

Texture atlas

Combining all the graphics in one texture atlas gives a performance boost and makes things more manageable. (A texture atlas is a similar idea to a sprite sheet, except the the images are better packed to used less space.) I used our in-house tool to create the atlas from the original artwork and this is the result:

Each image is packed into one big image. The tool also creates a xml file which tells us where in the atlas each image is located:

  
    
    
   ....
  

Each the location and size of each image is stored as well as an “offset”. The offset is used to “pad out” each frame, without wasting any space.
The atlas xml file is parsed using XMLHttpRequest:

var client = new XMLHttpRequest();
client.onreadystatechange = xmlHandler;
client.open("GET", "http://astronautz.com/wordpress/atlas.xml");
client.send();
...
function xmlHandler() 
{
	if (this.readyState == 4) 
	{
		if (this.status == 200 || this.status == 0) 
		{
			if (this.responseXML != null)
			{
				var x = this.responseXML.getElementsByTagName("image"); 
				if (x == null ) return;
				for (var n = 0; n < x.length; n++)
				{
					var atlasImage = new AtlasImage();
					atlasImage.load(x[n]);
					atlasMap[x[n].getAttribute("name")] = atlasImage;
				}
				init2();
			}
			else 
				alert("this.responseXML == null");
		} 
		else 
		{
			alert("this.status = " + this.status);
		}
	}
}

We create an AtlasImage object for each image and store it in an map, with the image name as a key.

function AtlasImage()
{
	this.m_x;
	this.m_y;
	this.m_width;
	this.m_height;
	this.m_xOffset;
	this.m_yOffset;
	this.load = function(elem)
	{
		this.m_x = parseInt(elem.getAttribute("x")); 
		this.m_y = parseInt(elem.getAttribute("y")); 
		this.m_width = parseInt(elem.getAttribute("width"));
		this.m_height = parseInt(elem.getAttribute("height"));
		// offset is an optional parameter
		if (elem.getAttribute("xOffset"))
			this.m_xOffset = parseInt(elem.getAttribute("xOffset"));
		else
			this.m_xOffset = 0;
		if (elem.getAttribute("yOffset"))
			this.m_yOffset = parseInt(elem.getAttribute("yOffset"));
		else
			this.m_yOffset = 0;
	}
	this.render = function(x, y)
	{
		context.drawImage(atlas, this.m_x, this.m_y,
			this.m_width, this.m_height, 
			this.m_xOffset+x, this.m_yOffset+y, 
			this.m_width, this.m_height);  
	}
};

There are 4 animations: ear flap, trunk swing, blink and standing still. The Animation class controls the individual animations:

function Animation()
{
	this.m_currFrame;
	this.m_age;
	this.m_listFrame = [];
	this.m_moveEyes = true;

	this.isFinished = function()
	{
		return this.m_age >= this.m_listFrame.length*1000/12;
	}
	this.start = function()
	{
		this.m_age = 0;
		this.m_currFrame = 0;
	}
	this.init = function(listIndex)
	{
		this.start();
		var image;
		for (var n = 0; n < listIndex.length; n++)
		{
			image = atlasMap[listIndex[n]];
			if (image)
				this.m_listFrame.push(image);
			else alert("missing image:"+listIndex[n]);
		}
	}

	this.update = function(timeElapsed)
	{
		this.m_age += timeElapsed;
		// 12 frames per second
		this.m_currFrame = Math.floor(this.m_age/1000*12);
		if (this.m_currFrame >= this.m_listFrame.length)
			this.m_currFrame = this.m_listFrame.length-1;
	}
	this.render = function()
	{
		this.m_listFrame[this.m_currFrame].render(0, 0);  
	}
};

You need to pass it a list of images that compose the animation sequence:

var earFlap = new Animation();
	earFlap.init(["base", "earflap02", "earflap04", "earflap06", 
		"earflap08", "earflap06", "earflap04", "earflap02", "base"]);
	earFlap.m_moveEyes = false;
	listAnim.push(earFlap);

Note: sometimes we use the same images several times in the same animation to save space.
For some animation we don’t want the eyes to move, because the head moves in the animation. For these cases we set m_moveEyes = false.

The current animation is stored in a global variable currAnim. In the main game loop once an animation stops we randomly select a new one, by setting currAnim to a new animation:

function render() 
{  
	var timeElapsed = new Date().getTime() - lastRender;
	lastRender = new Date().getTime();
	if (currAnim.isFinished())
	{
		var randNum = Math.floor(Math.random()*100);
		if (randNum < listAnim.length)
		{
			currAnim = listAnim[randNum];
		}
		else 
		{
			// make it show stand still animation most of the time
			currAnim = standStill;
		}
		currAnim.start();
	}
	context.clearRect(0, 0, canvasWidth, canvasHeight);
	currAnim.update(timeElapsed);
	currAnim.render();  
	if (currAnim.m_moveEyes == true)
	{
		eyeRight.update();
		eyeLeft.update();
		eyeRight.render();
		eyeLeft.render();
	}
	requestAnimFrame(render);  
}  

That’s all. Full source code can be found here.

Posted in CodeProject, web development | Tagged , , , , | Comments Off on HTML5 animation using a texture atlas

HTML5 Eyes that follow the mouse


Your browser has no canvas control support!
I made a cute flash movie of an elephant ages ago. Among other things the eyes of the elephant followed the cursor. Unfortunately subsequent flash security updates broke the functionality (if you want mouse move events outside the flash control you need special permissions). So, I decided to convert it to HTML5. The result you can see to the left of this paragraph. (Note: if you see nothing, it’s because you have an old version of IE.).

How it’s done

I use an onload event handler somewhere in the html file which calls the init() function. E.g.:



In the init() function the images are loaded and the canvas resized and then the render loop is started via requestAnimFrame. All images are stored in the one png (this is known as an image atlas).

function init()
{
	canvas = document.getElementById('nelly');
	if (canvas.getContext)
	{
		context = canvas.getContext('2d');
	}
	else
	{
		return;
	}
	logElement = document.getElementById('log');
	atlas = new Image();  
	atlas.src = 'http://astronautz.com/wordpress/nelly0.png';
	atlas.onload = function()
	{ 
		window.addEventListener ("mousemove", getCoords, true);
		xCanvas = canvas.offsetLeft;
		yCanvas = canvas.offsetTop;
		var elem = canvas.offsetParent;
		while (elem)
		{
			xCanvas += elem.offsetLeft;
			yCanvas += elem.offsetTop;
			elem = elem.offsetParent;
		}

		backWidth = 97;
		backHeight = 150;
		canvas.width = backWidth;
		canvas.height = backHeight;
		eyeRight.setSize(4, 4);
		eyeRight.setMin(44, 30);
		eyeRight.setMax(54, 52);
		eyeRight.setAtlas(97, 0);
		eyeRight.init();
		eyeLeft.setSize(4, 4);
		eyeLeft.setMin(34, 30);
		eyeLeft.setMax(39, 52);
		eyeLeft.setAtlas(97, 0);
		eyeLeft.init();
		requestAnimFrame(render);
	};  
}

Note, that I create a event handler which stores the mouse coordinates in two global variables:

var xMouse = 0;
var yMouse = 0;
...
function getCoords(event) 
{
	xMouse = event.clientX;
	yMouse = event.clientY + window.pageYOffset;
};
...
window.addEventListener ("mousemove", getCoords, true);

Each eye is controlled by an Eye object. It takes a min, max as parameters:

It also needs the size of the eye image and it’s position within the image atlas before you initialise:

eyeRight.setSize(4, 4);
eyeRight.setMin(44, 30);
eyeRight.setMax(54, 52);
eyeRight.setAtlas(97, 0);
eyeRight.init();

The eye movement is calculated by getting the slope of the angle between the eyeball center and the current mouse position. Then you need to calculate the position taking into account the quadrant:

	this.update = function()
	{
		var xDiff = xMouse-(xCanvas+this.m_xOrig);
		var yDiff = yMouse-(yCanvas+this.m_yOrig);
		// first calculate x pos
		if (yDiff == 0)
		{
			if (xDiff > 0)
			{
				this.m_x = this.m_xMax;
			}
			else
			{
				this.m_x = this.m_xMin;
			}
			this.m_y = this.m_yOrig;
		}
		else
		{
			var slope = xDiff/yDiff;
			if (yDiff > 0)
			{
				this.m_x = slope*(this.m_xMax-this.m_xMin) + this.m_xMin;
			}
			else
			{
				this.m_x = -slope*(this.m_xMax-this.m_xMin) + this.m_xMin;
			}
		}
		// then calculate y pos
		if (xDiff == 0)
		{
			if (yDiff > 0)
			{
				this.m_y = this.m_yMax;
			}
			else
			{
				this.m_y = this.m_yMin;
			}
			this.m_x = this.m_xOrig;
		}
		else
		{
			var slope = yDiff/xDiff;
			if (xDiff > 0)
			{
				this.m_y = slope*(this.m_yMax-this.m_yMin) + this.m_yMin;
			}
			else
			{
				this.m_y = -slope*(this.m_yMax-this.m_yMin) + this.m_yMin;
			}
		}
		if (this.m_x > this.m_xMax)
		{
			this.m_x = this.m_xMax;
		}
		else if (this.m_x < this.m_xMin)
		{
			this.m_x = this.m_xMin;
		}
		if (this.m_y > this.m_yMax)
		{
			this.m_y = this.m_yMax;
		}
		else if (this.m_y < this.m_yMin)
		{
			this.m_y = this.m_yMin;
		}
	}

The full source code can be downloaded here.

Posted in CodeProject, web development | Tagged , , , | 13 Comments

Creating realistic particle effect with HTML5 canvas



This is my first experiment with the HTML5 canvas – the so-called replacement for Flash. I tried to create a realistic smoke particle effect to go in the background of my war zone scene. As you can see above it’s worked out pretty well (unless you are using an old version of IE – which doesn’t support the canvas control).

Tips

  • Copy from real life
  • To figure out how smoke behaves I first studied a video clip frame by frame. I found that the smoke is made up of individual puffs of smoke, which expand rapidly emerging from the fire and then slowly drift upwards expanding much slower and fading out. If you want realism I recommend observing the effect in real life, rather than creating it purely from your head, or copying some one else’s particle effect.

  • The further from camera, the slower
  • It’s important to take into account the distance the particle effect is from the camera. The further away, the slower the particles will move. For instance, the fireball of a huge spaceship exploding will almost appear to expand in slow motion. If you make the fireball expand too fast it will look like a tiny explosion to the viewer.

Technical details

I embed a normal image in my html page as the background and then overlaid the canvas control on top of it, as follows:


...






On loading the image the init() function is called which positions the canvas control to lie exactly on top of the background image.

To animate the canvas we use the same render loop as we do in games. This is what it looks like:

var lastRender = new Date().getTime();
var context;
var smokeRight = new ParticleEmitter();
function render()
 {
     // time in milliseconds
     var timeElapsed = new Date().getTime() - lastRender;
     lastRender = new Date().getTime();
     smokeRight.update(timeElapsed);
     smokeRight.render(context);
     requestAnimFrame(render);
 }

requestAnimFrame is an improved version of setTimeout. It calls our render function once every 16.66 ms ( i.e. 60 FPS). Every object typically has an update and render method. update is called with a timeElapsed parameter.  timeElapsed is the time in milliseconds since the last call. If you want everything to animate smoothly it is important that you make all movement calculations relative to this parameter.

There are two “classes” (really function objects, because javascript does not have classes, as such), ParticleEmitter and Particle.  ParticleEmitter creates an array of Particles and animates them:

function ParticleEmitter()
{
	this.m_x;
	this.m_y;
	this.m_dieRate;
	this.m_image;
	this.m_speed = 0.02;
	this.m_alpha = 1.0;

	this.m_listParticle = [];

	// ParticleEmitter().init function
	// xScale = number between 0  and 1. 0 = on left side 1 = on top
	// yScale = number between 0  and 1. 0 = on top 1 = on bottom
	// particles = number of particles
	// image = smoke graphic for each particle
	this.init = function(xScale, yScale, particles, image)
	{
		this.m_x = CANVAS_WIDTH*xScale;
		this.m_y = CANVAS_HEIGHT*yScale;
		this.m_image = image;
		this.m_dieRate = 0.95;
		// start with smoke already in place
		for (var n = 0; n < particles; n++)
		{
			this.m_listParticle.push(new Particle());
			this.m_listParticle[n].init(this, n*50000*this.m_speed);
		}
	}

	this.update = function(timeElapsed)
	{
		for (var n = 0; n < this.m_listParticle.length; n++)
		{
			this.m_listParticle[n].update(timeElapsed);
		}
	}

	this.render = function(context)
	{
		for (var n = 0; n < this.m_listParticle.length; n++)
		{
			this.m_listParticle[n].render(context);
		}
	}
};

Each Particle initialises itself around the location of the emitter. The update method cause the particle to expand, fade in, then fade out and move upwards depending on the age. Each particle has a random direction, velocity and lifespan. When m_age > m_timeDie the particle is considered to be dead, at which case it is "reborn" at the location of the emitter again. This provides a continuous trail of smoke. (Well actually, I make it so that eventually the smoke stops according to the m_dieRate parameter).

The init function mentioned previously, loads the smoke puff graphic and the foreground character graphic and initialises the ParticleEmitters:

function init()
{
	var canvas = document.getElementById('tutorial');
	if (canvas.getContext)
	{
		context = canvas.getContext('2d');
	}
	else
	{
		return;
	}
	var imgBack = document.getElementById('background');
	CANVAS_WIDTH = imgBack.width;
	CANVAS_HEIGHT = imgBack.height;
	canvas.width = imgBack.width;
	canvas.height = imgBack.height;
	var xImage = imgBack.offsetLeft;
	var yImage = imgBack.offsetTop;
	var elem = imgBack.offsetParent;
	while (elem)
	{
		xImage += elem.offsetLeft;
		yImage += elem.offsetTop;
		elem = elem.offsetParent;
	}
	canvas.style.position = 'absolute';
	canvas.style.left = xImage + "px";
	canvas.style.top = (yImage) + "px";
	var imgSmoke = new Image();  
	imgSmoke.src = 'puffBlack.png';
	imgSmoke.onload = function()
	{ 
		smokeRight.init(.9, .531, 20, imgSmoke);
		smokeLeft.m_alpha = 0.3;
		smokeLeft.init(.322, .453, 30, imgSmoke);
		requestAnimFrame(render);
	};  
}

Only when the smoke image is finished loading is the first requestAnimFrame called, which kicks off the animation.

This is the graphic I used for the smoke particle:

You may notice there is also a wind effect controlled by the global windVelocity variable. This changes slightly every frame, simulating puffs of wind.

Optimisation

Finally, there is a drawing optimisation. The clearRect method clears all, or part of the canvas. We are being smart and we only clear the smallest rectangle that is needed. We keep track of this with the global variables: dirtyLeft, dirtyTop, dirtyRight, dirtyBottom. This optimisation may not actually be necessary in our case - depends on what platforms you are targetting.  The canvas control is quite fast on PCs (even older browsers like Firefox 3.6), however mobile browsers will struggle. This example works on the iPhone (albeit with slightly jittery animation)

Integration into WordPress

To get the particle effect to work in WordPress insert the following into the post:





Then you need to upload the script via ftp and specify the full url for all file references (i.e src="http://mydomain.com/wordpress/peffect.js"

References

I found the tutorial at Mozilla.org to be very useful source on the canvas control. Also, this article has some great optimisation tips.

Full listing

Finally, here is a full listing of the code used:

function ParticleEmitter()
{
	this.m_x;
	this.m_y;
	this.m_dieRate;
	this.m_image;
	this.m_speed = 0.02;
	this.m_alpha = 1.0;

	this.m_listParticle = [];

	// ParticleEmitter().init function
	// xScale = number between 0  and 1. 0 = on left side 1 = on top
	// yScale = number between 0  and 1. 0 = on top 1 = on bottom
	// particles = number of particles
	// image = smoke graphic for each particle
	this.init = function(xScale, yScale, particles, image)
	{
		// the effect is positioned relative to the width and height of the canvas
		this.m_x = CANVAS_WIDTH*xScale;
		this.m_y = CANVAS_HEIGHT*yScale;
		this.m_image = image;
		this.m_dieRate = 0.95;
		// start with smoke already in place
		for (var n = 0; n < particles; n++)
		{
			this.m_listParticle.push(new Particle());
			this.m_listParticle[n].init(this, n*50000*this.m_speed);
		}
	}

	this.update = function(timeElapsed)
	{
		for (var n = 0; n < this.m_listParticle.length; n++)
		{
			this.m_listParticle[n].update(timeElapsed);
		}
	}

	this.render = function(context)
	{
		for (var n = 0; n < this.m_listParticle.length; n++)
		{
			this.m_listParticle[n].render(context);
		}
	}
};

function Particle()
{
	this.m_x;
	this.m_y;
	this.m_age;
	this.m_xVector;
	this.m_yVector;
	this.m_scale;
	this.m_alpha;
	this.m_canRegen;
	this.m_timeDie;
	this.m_emitter;
  
	this.init = function(emitter, age)
	{
		this.m_age = age;
		this.m_emitter = emitter;
		this.m_canRegen = true;
		this.startRand();
	}

	this.isAlive = function () 
	{
		return this.m_age < this.m_timeDie;
	}

	this.startRand = function()
	{
		// smoke rises and spreads
		this.m_xVector = Math.random()*0.5 - 0.25;
		this.m_yVector = -1.5 - Math.random();
		this.m_timeDie = 20000 + Math.floor(Math.random()*12000);

		var invDist = 1.0/Math.sqrt(this.m_xVector*this.m_xVector 
			+ this.m_yVector*this.m_yVector);
		// normalise speed
		this.m_xVector = this.m_xVector*invDist*this.m_emitter.m_speed;
		this.m_yVector = this.m_yVector*invDist*this.m_emitter.m_speed;
		// starting position within a 20 pixel area 
		this.m_x = (this.m_emitter.m_x + Math.floor(Math.random()*20)-10);
		this.m_y = (this.m_emitter.m_y + Math.floor(Math.random()*20)-10);
		// the initial age may be > 0. This is so there is already a smoke trail in 
		// place at the start
		this.m_x += (this.m_xVector+windVelocity)*this.m_age;
		this.m_y += this.m_yVector*this.m_age;
		this.m_scale = 0.01;	
		this.m_alpha = 0.0;
	}

	this.update = function(timeElapsed)
	{
		this.m_age += timeElapsed;
		if (!this.isAlive()) 
		{
			// smoke eventually dies
			if (Math.random() > this.m_emitter.m_dieRate)
			{
				this.m_canRegen = false;
			}
			if (!this.m_canRegen)
			{
				return;
			}
			// regenerate
			this.m_age = 0;
			this.startRand();
			return;
		}
		// At start the particle fades in and expands rapidly (like in real life)
		var fadeIn = this.m_timeDie * 0.05;
		var startScale;
		var maxStartScale = 0.3;
		if (this.m_age < fadeIn)
		{
			this.m_alpha = this.m_age/fadeIn;
			startScale = this.m_alpha*maxStartScale; 
			// y increases quicker because particle is expanding quicker
			this.m_y += this.m_yVector*2.0*timeElapsed;
		}
		else
		{
			this.m_alpha = 1.0 - (this.m_age-fadeIn)/(this.m_timeDie-fadeIn);
			startScale = maxStartScale;
			this.m_y += this.m_yVector*timeElapsed;
		}
		// the x direction is influenced by wind velocity
		this.m_x += (this.m_xVector+windVelocity)*timeElapsed;
		this.m_alpha *= this.m_emitter.m_alpha;
		this.m_scale = 0.001 + startScale + this.m_age/4000.0;
	}

	this.render = function(ctx)
	{
		if (!this.isAlive()) return;
		ctx.globalAlpha = this.m_alpha;
		var height = this.m_emitter.m_image.height*this.m_scale;
		var width = this.m_emitter.m_image.width*this.m_scale;
		// round it to a integer to prevent subpixel positioning
		var x = Math.round(this.m_x-width/2);
		var y = Math.round(this.m_y+height/2);
		ctx.drawImage(this.m_emitter.m_image, x, y, width, height);  
		if (x < dirtyLeft)
		{
			dirtyLeft = x;
		}
		if (x+width > dirtyRight)
		{
			dirtyRight = x+width;
		}
		if (y < dirtyTop)
		{
			dirtyTop = y;
		}
		if (y+height > dirtyBottom)
		{
			dirtyBottom = y+height;
		}
	}
};

var lastRender = new Date().getTime();
var context;
var smokeRight = new ParticleEmitter();
var smokeLeft = new ParticleEmitter();
var CANVAS_WIDTH = 960;
var CANVAS_HEIGHT = 640;
// only redraw minimimum rectangle
var dirtyLeft = 0;
var dirtyTop = 0;
var dirtyRight = CANVAS_WIDTH;
var dirtyBottom = CANVAS_HEIGHT;
var windVelocity = 0.01;
var count = 0;


function init()
{
	var canvas = document.getElementById('tutorial');
	if (canvas.getContext)
	{
		context = canvas.getContext('2d');
	}
	else
	{
		return;
	}
	var imgBack = document.getElementById('background');
	// make canvas same size as background image
	CANVAS_WIDTH = imgBack.width;
	CANVAS_HEIGHT = imgBack.height;
	canvas.width = imgBack.width;
	canvas.height = imgBack.height;
	// get absolute position of background image
	var xImage = imgBack.offsetLeft;
	var yImage = imgBack.offsetTop;
	var elem = imgBack.offsetParent;
	while (elem)
	{
		xImage += elem.offsetLeft;
		yImage += elem.offsetTop;
		elem = elem.offsetParent;
	}
	// position canvas on top of background
	canvas.style.position = 'absolute';
	canvas.style.left = xImage + "px";
	canvas.style.top = yImage + "px";
	var imgSmoke = new Image();  
	imgSmoke.src = 'puffBlack.png';
	imgSmoke.onload = function()
	{ 
		smokeRight.init(.9, .531, 20, imgSmoke);
		smokeLeft.m_alpha = 0.3;
		smokeLeft.init(.322, .453, 30, imgSmoke);
		requestAnimFrame(render);
	};  
}

// shim layer with setTimeout fallback
    window.requestAnimFrame = (function(){
      return  window.requestAnimationFrame       || 
              window.webkitRequestAnimationFrame || 
              window.mozRequestAnimationFrame    || 
              window.oRequestAnimationFrame      || 
              window.msRequestAnimationFrame     || 
              function( callback ){
                window.setTimeout(callback, 17);
              };
    })();

  
function render() 
{  
	// time in milliseconds
	var timeElapsed = new Date().getTime() - lastRender;
	lastRender = new Date().getTime();
	context.clearRect(dirtyLeft, dirtyTop, dirtyRight-dirtyLeft, dirtyBottom-dirtyTop);
	dirtyLeft = 1000;
	dirtyTop = 1000;
	dirtyRight = 0;
	dirtyBottom = 0;
	smokeRight.update(timeElapsed);
	smokeRight.render(context);
	smokeLeft.update(timeElapsed);
	smokeLeft.render(context);
	windVelocity += (Math.random()-0.5)*0.002;
	if (windVelocity > 0.015)
	{
		windVelocity = 0.015;
	}
	if (windVelocity < 0.0)
	{
		windVelocity = 0.0;
	}
	requestAnimFrame(render);  
}  
Posted in web development | Tagged , , , | 5 Comments

XML: Killing the Bloat

Many people criticise XML for being unnecessarily verbose, which is true. What they may not realise is that the bloat can be easily be slimmed down by storing the data in attributes instead of elements.

Example of bloated XML:

The same info can be stored in a much more compact fashion:

slim XML exampleThe slimmed down version has many advantages:

  • Easier to read
  • Faster to load
  • Uses up less disk space and bandwidth

Of coarse, you could achieve the same results by just using JSON, but then you lose out on the XML buzzword 😉

Posted in CodeProject, XML | Comments Off on XML: Killing the Bloat

Tiling system used in Armageddon Wars

Traditional Grid system

The traditional tiled game uses tiles that are opaque and the same size and shape – usually square or hexagonal – and located side by side in a “grid”.

This system has many disadvantages.  The artist has to create a lot more tiles to provide variety in the landscape. Also, extra tiles need to be created to provide the borders between one type of terrain and an other – for essentially what is the same graphic. The end result will still look like it’s created out of a grid of tiles.

The “Big Pile of Tiles” system

So, how to we create a better tiling system? The answer is quite obvious – just remove all the restrictions. Get rid of the grid idea completely, so you just have a “big pile of tiles”. Tiles are alpha transparent and can be layered on top of each other many times. Each tile can be located anywhere in the scene and it can have any orientation, scaling, or transparency. Now days most devices have a decent graphics processor and can handle this easily.

Step-by-step example

This is an example of  a landscape created using this system:
example of landscapeThe above scene was created using these tiles:

First we started with the opaque square base tile and lay this tile down side-by-side. This provides a fine level of detail underneath the other tiles.

Then we add the mottled tile to add larger levels of detail. The same tile is added several times with different scaling and rotation.

Next we add the black and white tiles to create dark and light areas:

Then, we add the road tiles:


Finally, some gray tiles at the top and cracks:

As you can see, with just a small selection of tiles we can create a wide variety of landscape and the result does not look like it’s built out of tiles.
The tile editor creates these tiles in a text file. This is what the above scene tiles looks like:

tile name=plainTile type=base
tile name=stainWhite1 x=-32 y=424 zorder=1 alpha=30 scale=5.3
tile name=stainWhite1 x=107 y=346 zorder=1 alpha=45 scale=5.3 rotation=330
tile name=stainBlack4 x=134 y=259 zorder=1 alpha=65
tile name=stainBlack4 x=-40 y=217 zorder=1 alpha=85 scale=1.8
tile name=plaza3 x=207 y=211 zorder=1 alpha=85 scale=1.55 rotation=68
tile name=plaza3 x=37 y=198 zorder=1 scale=1.7 rotation=346
tile name=stainRect x=-63 y=369 zorder=2 alpha=50 scale=1.05 rotation=264
tile name=stainRect x=582 y=237 zorder=2 alpha=60 scale=2 rotation=280
tile name=stainRect x=966 y=17 zorder=2 alpha=70 scale=1.45 rotation=359
tile name=stainRegular3 x=306 y=529 zorder=2 alpha=80 scale=1.9 rotation=267
tile name=roadBroken1 x=55 y=373 zorder=4 alpha=50 rotation=263
tile name=roadNarrow3 x=361 y=401 zorder=4 alpha=55 rotation=350
tile name=roadNarrow3 x=327 y=722 zorder=4 alpha=60 scale=1.15 rotation=248

This is what the final scene looks like with buildings added in:

Posted in Uncategorized | Tagged , , | Comments Off on Tiling system used in Armageddon Wars

Removing randomness for testing

Normally, games have some sort of randomness. This makes it difficult to test because each time you play the game a different set of events occur.

I had this idea to have a switchable random number class. Normally, the class returns a random number, but when the test flag is set, it returns numbers in  a pre determined sequence.
Here’s the code:

class RandNumber
{
public:
	RandNumber();
	~RandNumber();
	void LoadSeq();
	void GenSeq();
	int GetInt(int max)
	{
		int randNum;
		if (m_listSeq.empty())
		{
			randNum = rand();
		}
		else
		{
			randNum = m_listSeq[m_currSeqIX];
			m_currSeqIX++;
		}
		randNum = int((float)randNum*max*m_invRandMax);
		return randNum;
	}
	float GetFloat(float max)
	{
		int randNum;
		if (m_listSeq.empty())
		{
			randNum = rand();
		}
		else
		{
			randNum = m_listSeq[m_currSeqIX];
			m_currSeqIX++;
			if (m_currSeqIX == (int)m_listSeq.size())
				m_currSeqIX = 0;
		}
		return (float)randNum*max*m_invRandMax;
	}
private:
	vector m_listSeq;
	int m_currSeqIX;
	float m_invRandMax;
};

You need to call GenSeq() initially to create the file with the sequence of numbers.

#include "RandNumber.h"

RandNumber::RandNumber()
{
	m_invRandMax = 1.0f/(float)RAND_MAX;
}

RandNumber::~RandNumber()
{
}

const char* szSeqFilename = "randSeq.txt";

void RandNumber::LoadSeq()
{
	FILE* pFile;
	pFile = fopen(szSeqFilename, "rb");
	if (pFile == NULL)
		throw RacException("cannot open ", szSeqFilename);
	m_currSeqIX = 0;
	char* pBuffer;

	fseek(pFile, 0, 2 /* SEEK_END */);
	int fileSize = (int)ftell(pFile);
	if (fileSize == 0) return;
	fseek(pFile, 0, 0 /* SEEK_SET */);
  pBuffer = new char[fileSize];
	int bytesRead = (int)fread(pBuffer, fileSize, 1, pFile);	// read length and type
	if (bytesRead != 1)
		throw RacException("cannot read ", szSeqFilename);
	fclose(pFile);

	char* szToken;
	szToken = strtok(pBuffer,",");
	while(szToken != NULL)
	{
		m_listSeq.push_back(atoi(szToken));
		szToken = strtok(NULL, ",");
	}
  delete [] pBuffer;
}

void RandNumber::GenSeq()
{
	string file = g_resManager.GetDirResources()+szSeqFilename;
	FILE* pFile = fopen(file.c_str(), "wt");
	if (pFile == NULL)
	{
		assert(false);
		return;
	}
	ostringstream os;
	for (int n = 0; n < 10000; n++)
	{
		os << rand() << ",";
	}
	fwrite(os.str().c_str(), os.str().length(), 1, pFile);
	fclose(pFile);
}
Posted in Testing | Leave a comment

Optimizing meshes for the iPhone

The PowerVR guide says if you order triangle indices as if they were triangle strips you will get a speed boost, because the PowerVR chip implementation uses  triangle strips internally. The PowerVR SDK has an example to shows this, using a model of a sphere. I assumed that the example was an extreme case and you wouldn’t see such a big improvement for real models. However, I was pleasantly surprised to see that it actually did give a big improvement in a real world example – cutting down render time from 38ms to 35.5ms in a scene with 18 skinned meshes.

I used tootle to re-order the indices:

int result = TootleOptimizeVCache(pIndices,
      numTriIndices/3, m_listVertexArray[0]->GetNumVertices(),
      TOOTLE_DEFAULT_VCACHE_SIZE, pIndices, NULL, TOOTLE_VCACHE_LSTRIPS);
if (result != TOOTLE_OK)
    cout << "could not optimise!" << endl;

It’s important to use TOOTLE_VCACHE_LSTRIPS, because the default ordering is designed for PC GPUs and won’t work well on the iPhone.
Also, you have to reorder the vertex data to match the order in the triangle index array. Tootle can be found here.
Unfortunately, Tootle crashes for certain meshes. If there was source code, I probably could have fixed that – but there isn’t :(.

Posted in Graphic engine design, graphics, iPhone development, OpenGL, Uncategorized | Comments Off on Optimizing meshes for the iPhone

Vertex data interleaving on iPhone

In the PowerVR manual it says that you should interleave vertex data:

I tried this out on my engine and actually found no measurable improvement for static meshs. For skinned meshes it actually goes slower! Why? Because if you are doing software skinning, you have to reset the vertex buffer ever frame. If the vertex data isn’t interleaved then you can do this using memset for the postion and normal data, however if it’s interleaved you can’t because you would overwrite the texture co-ordinates, so you have to use a loop instead.

Posted in Uncategorized | Comments Off on Vertex data interleaving on iPhone

Accelerating Software skinning with VFP assembler

I was trying to get my engine perform better on older iDevices. I need to be able to render 18 characters on screen simultaneously, however on the 1st gen iPod touch it takes 63ms to render the scene. I thought I’d try to use vfp assembly to speed it up, using code from this site: http://code.google.com/p/vfpmathlibrary/

Initially, it didn’t make any difference at all. This was because it was GPU bound. So, I reduced the scene to 8 skinned meshes – which would show up optimisation improvements better.

The assembler code still didn’t speed things that much. I ran the code analyzer tool and found the piece of code that was most of the time was the code that transforms the vertices with the current matrix of the joint:

int* pIndexData = (int*)m_pInfluences;
for (n = 0; n < m_numInfluences; n++)
{
index = *pIndexData;
pIndexData++;
weight = *((float*)pIndexData);
pIndexData++;
matrix.TransformPoint(&pOrigData[index], weight, &pCurrData[index]);
}

void Matrix::TransformPoint(const float* pInVertex, float weight,
float* pOutVertex) const
{
pOutVertex[0] += weight*(pInVertex[0]*m[0] + pInVertex[1]*m[4] +
pInVertex[2]*m[8] + m[12]);
pOutVertex[1] += weight*(pInVertex[0]*m[1] + pInVertex[1]*m[5] +
pInVertex[2]*m[9] + m[13]);
pOutVertex[2] += weight*(pInVertex[0]*m[2] + pInVertex[1]*m[6] +
pInVertex[2]*m[10] + m[14]);
}
There was a function similiar to this in the vfpmathlibrary. So I modified it and this is the result:


// Sets length and stride to 0.
#define VFP_VECTOR_LENGTH_ZERO "fmrx r0, fpscr nt"
"bic r0, r0, #0x00370000 nt"
"fmxr fpscr, r0 nt"

// Set vector length. VEC_LENGTH has to be bitween 0 for length 1 and 3 for length 4.
#define VFP_VECTOR_LENGTH(VEC_LENGTH) “fmrx r0, fpscr nt”
“bic r0, r0, #0x00370000 nt”
“orr r0, r0, #0x000” #VEC_LENGTH “0000 nt”
“fmxr fpscr, r0 nt”

inline void Matrix::TransformPoint(const float* pInVertex, float weight,
float* pOutVertex) const
{
// Load the whole matrix.
“fldmias %[matrix], {s8-s23} nt”
// Load vector to scalar bank.
“fldmias %[pInVertex], {s0-s2} nt”
// Load vector to scalar bank.
“fldmias %[weight], {s3} nt”
// Load vector to scalar bank.
“fldmias %[pOutVertex], {s28-s30} nt”

// VFP_VECTOR_LENGTH(2)

// First column times matrix.
“fmuls s24, s8, s0 nt”
“fmacs s24, s12, s1 nt”
“fmacs s24, s16, s2 nt”
“fadds s24, s24, s20 nt”
“fmuls s24, s24, s3 nt”
“fadds s24, s24, s28 nt”

// Save vector.
“fstmias %[pOutVertex], {s24-s26} nt”

// VFP_VECTOR_LENGTH_ZERO
:
: [matrix] “r” (m),
[pInVertex] “r” (pInVertex),
[weight] “r” (&weight),
[pOutVertex] “r” (pOutVertex)
: “cc”,
“s0”, “s1”, “s2”, “s3”,
“s8”, “s9”, “s10”, “s11”, “s12”, “s13”, “s14”, “s15”,
“s16”, “s17”, “s18”, “s19”, “s20”, “s21”, “s22”, “s23”,
“s24”, “s25”, “s26”, “s28”, “s29”, “s30”
);
}

It took me quite a while to figure out the assembler, because you need to reference several very technical books to figure it out. I’d like to make this job easier for any interested programmers out there. So, just let me explain it line by line.

On the first line you have: asm volatile(…); .This instructs gcc that the stuff in the ( ) brackets is assembler code. volatile means, tells gcc not to try to “optimize” the code.

Then you have a number of strings each string is an arm vfp instruction.

The vfp has 4 banks of 8 single precision floating point registers:

The idea is that you can do up to 8 similar floating point operations at the same time.  If you look at the formula that we’re trying to implement again:

pOutVertex[0] += weight*(pInVertex[0]*m[0] + pInVertex[1]*m[4] + pInVertex[2]*m[8] + m[12]);
pOutVertex[1] += weight*(pInVertex[0]*m[1] + pInVertex[1]*m[5] + pInVertex[2]*m[9] + m[13]);
pOutVertex[2] += weight*(pInVertex[0]*m[2] + pInVertex[1]*m[6] + pInVertex[2]*m[10] + m[14]);

You see that we could do pInVertex[0]*m[0], pInVertex[0]*m[1] and pInVertex[0]*m[2] all in one instruction. And the rest of the formula is done the same way – three operations all in the one go.

So, let’s go through the code line by line.

First you have: "fldmias  %[matrix], {s8-s23}     nt"

fldmais loads memory contents into several registers. Here, it’s loading the entire matrix (16 floats) into s8-s23. (It doesn’t actually use all the data in the matrix, but it’s easier to do it all in one instruction).

The “matrix” is an assember variable defined in the section at the bottom, but we’ll cover that later.

Notice, there is nt at the end of the line. Thats just to format the assember code. It’s just something that you have to add to each assembler line.

Next, we have: "fldmias  %[pInVertex], {s0-s2}      nt"

This loads the 3 vertex co-ords into s0-s2 – i.e. bank 0. Bank zero is different than the other banks, but I’ll go into that later.

Then, we load the weight and the output vertex co-ords into other registers:

"fldmias  %[weight], {s3}      nt"
"fldmias  %[pOutVertex], {s28-s30}      nt"

So, now we have everything loaded.

Next we have to tell the vpf how many ops we do at the same time. We have a macro:
VFP_VECTOR_LENGTH(2)

This sets the vector length setting to 3 (it’s actually one more than the specified parameter).

So, now it’s time to do the fun part: the math ops!

The first op is: "fmuls s24, s8, s0        nt"

This is equivalent to three single vector ops:

fmuls 24, s8, s0
fmuls 25, s9, s0
fmuls 26, s10, s0

s0 is in bank 0 and this bank has special function: the address never increments for a vector operation ( a so-called scalar vector). Now, if you remember we had the matrix data in s8-s23 and the vertex data in s0-s3. So this function does the following calculation:

s24 = pInValues[0]*m[0]
s25 = pInValues[0]*m[1]
s26 = pInValues[0]*m[2]

We are always dumping the results into s24-s26, which we use as temp registers.

The next instruction is:

"fmacs s24, s12, s1       nt"

fmacs multiplies, then adds. So this instruction is the equivilant to:

s24 += pInValues[1]*m[4]
s25 += pInValues[1]*m[5]
s26 += pInValues[1]*m[6]

Then

"fmacs s24, s16, s2       nt"

As you probably guess, this is the equivilant to:

s24 += pInValues[2]*m[8]
s25 += pInValues[2]*m[9]
s26 += pInValues[2]*m[10]

Then:

"fadds s24, s24, s20        nt"

As you might guess this is addition:

s24 += m[12]
s25 += m[13]
s26 += m[14]

Then multiply by the weight which is stored in s3:

"fmuls s24, s24, s3        nt"

s24 *= weight
s25 *= weight
s26 *= weight

Finally, add to the current vertex data (which we stored in s28-s30):

"fadds s24, s24, s28        nt"

s24 += pOutValues[0]
s25 += pOutValues[1]
s26 += pOutValues[2]

Then, we load the result back into the current vertex data
"fstmias  %[out], {s24-s26}  nt"
And the VFP_VECTOR_LENGTH_ZERO macro restores the vector size back to the default value of 1 (otherwise all hell would break loose).

The stuff at the end tells gcc the inputs and output of the function. There always has to be three sections seperated by colons :
: // output parameters
: [matrix] “r” (m),
[pInVertex] “r” (pInVertex),
[weight] “r” (&weight),
[pOutVertex] “r” (pOutVertex)            // input parameters
: “r0”, “cc”, “s0”,  “s1”,  “s2”,  “s3”,
“s8”,  “s9”,  “s10”, “s11”, “s12”, “s13”, “s14”, “s15”,
“s16”, “s17”, “s18”, “s19”, “s20”, “s21”, “s22”, “s23”,
“s24”, “s25”, “s26”, “s28”, “s29”, “s30” // clobber list

The first section is the output parameters, which is blank. This doesn’t make any sense, because really it should have pOutVertex, but apparently it just works that way – don’t ask me why.

The next section is the input parameters. First you have the variable name used in the assembler code surrounded by square brackets [], then you have a “r” then the variable name as used in the c++ part of the code in round brackets (). Note: this has to be an address, *not* a value, that’s why the weight has a & in front of it.

The next section is what is affectionately known as “the clobber list“. This tells gcc what registers we have used in the program. If you accidentally forget to include a register in the clobber list, it’ll crash, so this is important.

I found that the program could be speeded up even more by moving the VFP_VECTOR_LENGTH macros from TransformPoint to outside of the main loop:


SetVectorLen2();
int* pIndexData = (int*)m_pInfluences;
for (n = 0; n < m_numInfluences; n++)
{
index = *pIndexData;
pIndexData++;
weight = *((float*)pIndexData);
pIndexData++;
matrix.TransformPoint(&pOrigData[index], weight, &pCurrData[index]);
}
SetVectorLen0();

All in all, the assembler code reduces the total render time from 34ms to 30.5ms (when rendering 8 skinned meshes), which is not bad.

If you try to run this code on a newer device, like a iPhone 3GS, you’re in store for a surprise as the don’t seem to support the VFP instructions and it actually reduces the performance by a large amount :-D.

But don’t worry about this because the 3GS goes so fast it doesn’t really need assembler.

 

Posted in iPhone development, skinned meshes, Uncategorized | Tagged , , , , , | Comments Off on Accelerating Software skinning with VFP assembler

Slowville: std::map

Many people don’t realise how slow the map that comes with the standard template library is.

Reciently, did some performance comparisons of std::map versus boost::unordered_map.

These are the benchmarks that I tested:

#define pmap map

void PerfTests::MapTest()
{
pmap mapTest;
int time1 = GetMilliSeconds();
int n;
int randNum, rand2;
rand2 = 0;
char buffer[100];
pair ::iterator,bool> ret;
int total = 0;
for (n = 0; n < 10000; n++) { randNum = rand()%1000; sprintf(buffer, "%d", randNum); ret = mapTest.insert(std::make_pair (buffer, randNum) ); if (ret.second == false) total++; } pmap::iterator iter1;
for (n = 0; n < 10000; n++) { randNum = rand()%1000; sprintf(buffer, "%d", randNum); iter1 = mapTest.find(buffer); if (iter1 != mapTest.end()) { total += iter1->second;
}
}
int time2 = GetMilliSeconds();
int timeElapsed = abs(time2-time1);
cout << "map test1 time:" << timeElapsed << "ms" << endl; time1 = time2; pmap mapTest2;
pmap::iterator iterLowest;
pmap::iterator iter2;
int f;
for (n = 0; n < 10000; n++) { for (f = 0; f < 10; f++) { randNum = rand()%1000; iter2 = mapTest2.find(randNum); if (iter2 != mapTest2.end()) { rand2 = iter2->second+1;
}
else
{
mapTest2[randNum] = rand2;
rand2 = n;
}
}
// find lowest
iterLowest = mapTest2.begin();
for (iter2 = mapTest2.begin(); iter2 != mapTest2.end(); iter2++)
{
if (iter2->second > iterLowest->second)
iterLowest = iter2;
}
mapTest2.erase(iterLowest);
}
time2 = GetMilliSeconds();
timeElapsed = abs(time2-time1);
cout << "map test2 time:" << timeElapsed << "ms" << endl; }

(The second test is similar to the code in my pathfinding algorithm, so it's more of a real world test.)

And these are the results running on 1st gen iPod touch:

boost::unordered_map
map test1 time:115ms
map test2 time:2251ms

std::map
map test1 time:200ms
map test2 time:3940ms

As you can see it's nearly twice as slow.

std::map is an ordered map. In other words, when iterating the values are ordered according to the key. Normally, you don't need this functionality, so using a hash map like boost::unordered_map is a no-brainer.

Posted in Uncategorized | Tagged , , , , , , , | Comments Off on Slowville: std::map