#include <avr/wdt.h>
#include <avr/io.h>
#include <avr/pgmspace.h>
#include <avr/eeprom.h>
#include <avr/interrupt.h>
#include <util/delay.h>
#include <math.h>
#include <stdarg.h>
#include <stdio.h>

typedef unsigned long UInt32;
typedef unsigned short UInt16;
typedef unsigned char UInt8;
typedef signed char Int8;

//ROWS (active high)
//0  1  2  3  4  5  6  7
//D0 D2 D5 D7 C4 C2 C0 B2

//COLS (active low)
//0  1  2  3  4  5  6  7
//D1 D3 D6 B0 C3 C1 B5 B1

static const PROGMEM UInt8 gRowsB[] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04};
static const PROGMEM UInt8 gRowsC[] = {0x00, 0x00, 0x00, 0x00, 0x10, 0x04, 0x01, 0x00};
static const PROGMEM UInt8 gRowsD[] = {0x01, 0x04, 0x20, 0x80, 0x00, 0x00, 0x00, 0x00};
static const PROGMEM UInt8 gColsB[] = {0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x20, 0x02};
static const PROGMEM UInt8 gColsC[] = {0x00, 0x00, 0x00, 0x00, 0x08, 0x02, 0x00, 0x00};
static const PROGMEM UInt8 gColsD[] = {0x02, 0x08, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00};

#define ALL_COLS_B	0x23	//values with all cols disabled in each port
#define ALL_COLS_C	0x0A
#define ALL_COLS_D	0x4A


static volatile UInt8 gBufPos;
static volatile UInt8 volatile* gBufAddr;
static UInt8 gBack = 0;	//which bufer is backbuffer

/*

The screen is double buffered and interrupt driven. As long as you do
not disable interrupts for a long time, it wil juts automagically work.
Use clearScreen() to clear the backbuffer and setPix() to set a pixel
in it to a particular brightness.

Why 4 bytes per row and not 3?
While it is true that 3 is enough (values of the 3 ports) - 4 makes it
easier to address without using multiplication. Sidenote: on AVR this
may not be as clever as it seems, since shifts of non-8-bit-data are
always as slow.
*/

#define BPP		4	//there is no guarantee that changing this will work. I promise it will NOT work if this is not a power of two :)
#define BYTES_PER_ELEM	4
#define HEIGHT		8
#define WIDTH		8

UInt8 gBuffer0[BPP/*brightnesses*/ * HEIGHT/*rows*/ * BYTES_PER_ELEM /*bytes per row*/];
UInt8 gBuffer1[BPP/*brightnesses*/ * HEIGHT/*rows*/ * BYTES_PER_ELEM /*bytes per row*/];


void setPix(UInt8 r, UInt8 c, UInt8 bri){	//bri is 0..15
	
	UInt8* buf = (gBack ? gBuffer1 : gBuffer0) + (r << 2);
	UInt8 t, v1, v2, v3;
	
	for(t = 0; t < BPP; t++, bri >>= 1, buf += HEIGHT * BYTES_PER_ELEM){	//think about it...
		
		v1 = buf[0] | pgm_read_byte(gColsB + c);
		v2 = buf[1] | pgm_read_byte(gColsC + c);
		v3 = buf[2] | pgm_read_byte(gColsD + c);
	
		if(bri & 1){
			v1 &=~ pgm_read_byte(gColsB + c);
			v2 &=~ pgm_read_byte(gColsC + c);
			v3 &=~ pgm_read_byte(gColsD + c);
		}
		
		buf[0] = v1;
		buf[1] = v2;
		buf[2] = v3;
	}
}

void clearScreen(void){

	UInt8* ptr = gBack ? gBuffer1 : gBuffer0;
	UInt8 t, r;
	
	for(t = 0; t < BPP; t++) for(r = 0; r < HEIGHT; r++){ 		//clear all brightness planes
		
		*ptr++ = pgm_read_byte(gRowsB + r) | ALL_COLS_B;
		*ptr++ = pgm_read_byte(gRowsC + r) | ALL_COLS_C;
		*ptr   = pgm_read_byte(gRowsD + r) | ALL_COLS_D;
		ptr += 2;
	}
}

void __attribute__((noinline)) swapBufs(void){	//noinline is a fix got a gcc bug, wherein it will inline this func, and then remove it
	
	UInt8* ptr = gBack ? gBuffer1 : gBuffer0;
	UInt8 flg;
	
	gBack ^= 1;
	
	flg = SREG;
	cli();
	gBufAddr = ptr;
	SREG = flg;
}

static void init(){
	
	//wdt
	{		
		cli();
		wdt_reset();
		wdt_disable();
	}
	
	//ports
	{
		MCUCR |= 0x40;
		ACSR = 0x80;	//disable comparator
		DIDR1 = 0;
		DIDR0 = 0;
		PORTB = ALL_COLS_B;
		PORTC = ALL_COLS_C;
		PORTD = ALL_COLS_D;
		DDRB = 0b00100111;
		DDRC = 0b00011111;
		DDRD = 0b11101111;
	}
	
	//LCD timers
	{
		TCCR0A = 0b00000010;	//resets on match with A
		TCCR0B = 0b00000011;	//clocked at Fosc/64 = 125KHz, thus total refresh cyclec on screen is 40Hz
		TIMSK0 = 0b00000010;	//interrup ton match with A
	}
}

ISR(TIMER0_COMPA_vect){
		
	register UInt8 vB, vC, vD;
	UInt8* ptr = gBufAddr + gBufPos;
	
	vB = *ptr++;
	vC = *ptr++;
	vD = *ptr;
	
	PORTC = ALL_COLS_C;
	PORTD = ALL_COLS_D;
	PORTB = vB;
	PORTC = vC;
	PORTD = vD;
	
	gBufPos = (gBufPos + 4) & 0x7F;
	vB = 0x1F;
	if(gBufPos & 0x20) vB |= 0x20;
	if(gBufPos & 0x40) vB |= 0x40;
	OCR0A = vB;
}

static UInt8 rand(void){
	
	static UInt32 seed = 'dgv3';

	seed = (seed << 1) ^ ((seed & 0x80000000) ? 0xD3451207 : 0x00);
	
	return seed;
}

int __attribute__((noreturn)) main(){
	init();
	gBufPos = 0;
	OCR0A = 0x7F;
	clearScreen();
	swapBufs();
	sei();
	
	//random
	if(0){
		UInt8 r, c;
		
		while(1){
			clearScreen();
			for(r = 0; r < HEIGHT; r++) for(c = 0; c < WIDTH; c++) setPix(r, c, rand() ^ rand());
			swapBufs();
			_delay_ms(200);
		}
	}
	
	//fire
	{
		UInt8 scn[HEIGHT][WIDTH] = {{0,},};
		UInt8 r, c, t;
		
		clearScreen();
		swapBufs();
		
		while(1){
			//advance fire
			for(r = 0; r < HEIGHT - 1; r++) for(c = 0; c < WIDTH; c++){
				
				t = scn[r + 1][c] << 1;
				t += scn[r][c] >> 1;
				t += (c ? scn[r + 1][c - 1] : scn[r + 1][WIDTH - 1]) >> 1;
				t += (c == WIDTH - 1 ? scn[r + 1][0] : scn[r + 1][c + 1]) >> 1;
				t >>= 2;
				scn[r][c] = t;
				setPix(r, c, t);
			}
			
			//generate random seeds on the bottom
			for(c = 0; c < WIDTH; c++) {
				
				t = (rand() > 0xB0) ? rand() & 0x0F : (scn[HEIGHT - 1][c] ? scn[HEIGHT - 1][c] - 1 : 0);
				
				scn[HEIGHT - 1][c] = t;
				setPix(HEIGHT - 1, c, t);
			}
			swapBufs();
			_delay_ms(110);
		}
	}
	
	//life
	if(0){
		#define ZOOM		1
		#define DISP_MUL	8
		
		UInt8 scn[HEIGHT * ZOOM][WIDTH * ZOOM] = {{0,},};
		Int8 r, c, t, dx, dy, er, ec;
		
		clearScreen();
		swapBufs();
		
		for(r = 0; r < HEIGHT * ZOOM; r++) for(c = 0; c < WIDTH * ZOOM; c++) scn[r][c] = rand() > 0xb0 ? 1 : 0;	//0xb0!?!? Ha, magic number(s) galore!
		
		while(1){
			
			for(r = 0; r < HEIGHT * ZOOM; r++) for(c = 0; c < WIDTH * ZOOM; c++){
				
				t = 0;
				for(dx = -1; dx <= 1; dx++) for(dy = -1; dy <= 1; dy++) if(dx || dy){
					
					er = r + dy;
					ec = c + dx;
					
					if(er < 0) er = (HEIGHT * ZOOM) - 1;
					if(ec < 0) ec = (WIDTH * ZOOM) - 1;
					if(er == (HEIGHT * ZOOM)) er = 0;
					if(ec == (WIDTH * ZOOM)) ec = 0;
					
					t += scn[er][ec] & 1;
				}
				
				//rules of life are cruel...
				if(t == 2) t = scn[r][c];
				else if(t == 3) t = 1;
				else t = 0;
				
				scn[r][c] |= t << 1;
			}
			
			for(r = 0; r < HEIGHT * ZOOM; r++) for(c = 0; c < WIDTH * ZOOM; c++) scn[r][c] >>= 1;
			
			for(r = 0, er = 0; r < HEIGHT * ZOOM; r += ZOOM, er++) for(c = 0, ec = 0; c < WIDTH * ZOOM; c += ZOOM, ec++){
				
				t = 0;
				
				for(dx = 0; dx < ZOOM; dx++) for(dy = 0; dy < ZOOM; dy++) t += scn[r + dy][c + dx];
				
				setPix(er, ec, t * DISP_MUL);
			}
			
			swapBufs();
			
			_delay_ms(100);
		}
	}
}