[WIP][SBDL] Mot3d ported from Pico 8

I ported Tom Mulgrew’s Mot3d Pico 8 cart to C/C++ and then used SBDL as frontend for rendering.

SkyBerron Mot3d.bin.1_speed

SkyBerron Mot3d.bin.2_speed

I also added optional depth based lighting:

SkyBerron Mot3d.bin.3_speed

SkyBerron Mot3d.bin (90.7 KB) (first version, very slow)

SkyBerron Mot3d.bin (92.4 KB) (second version, a bit faster)

SkyBerron Mot3d v3.bin (95.0 KB) (third version, improved framerate, floor and ceiling pixel doubling, reduced overdrawing, fixed bugs, fixed field of view)

SkyBerron Mot3d.bin.2

Source code ported to C/C++:

#include <stdint.h>

// ----- BEGIN UTILITY -----


#define SCR_WIDTH       220
#define SCR_HEIGHT      176
#define SCR_HALF_WIDTH  (SCR_WIDTH>>1)
#define SCR_HALF_HEIGHT (SCR_HEIGHT>>1)

#define FP_SHIFT        16
#define FP_HALF_SHIFT   (FP_SHIFT>>1)
#define FP_MASK         0xFFFF0000
#define FTOFP(A)        ((int32_t)((A)*(1<<FP_SHIFT)))
#define FPTOF(A)        (((float)(A))/(1<<FP_SHIFT))
#define ITOFP(A)        (((int32_t)(A))<<FP_SHIFT)
#define FPTOI(A)        ((A)>>FP_SHIFT)

int32_t fpsgn( int32_t a ) {
    return( a >= 0 ? ITOFP(1) : ITOFP(-1) );
}
int32_t fpabs( int32_t a ) {
    return( a >= 0 ? a : (-a) );
}
int32_t fpflr( int32_t a ) {
    return( a & FP_MASK );
}
int32_t fpceil( int32_t a ) {
    return( -fpflr( -a ) );
}
int32_t fpmax( int32_t a, int32_t b ) {
    return ( a > b ? a : b );
}
int32_t fpmin( int32_t a, int32_t b ) {
    return ( a < b ? a : b );
}
int32_t fpmul( int32_t a, int32_t b ) {
    return( (int32_t) ( ( ( (int64_t) a ) * b ) >> FP_SHIFT ) );
}
int32_t fpdiv2( int32_t a, int32_t b ) {
    return( (int32_t) ( ( ( (int64_t) a ) << FP_SHIFT ) / b ) );
}

int32_t fpdiv( int32_t a, int32_t b ) {
    int32_t s = 1;
    uint32_t aa, bb;
    if( a < 0 ) {
        s = -s;
        aa = (uint32_t) (-a);
    } else {
        aa = (uint32_t) a;
    }
    if( b < 0 ) {
        s = -s;
        bb = (uint32_t) (-b);
    } else {
        bb = (uint32_t) b;
    }
    uint8_t ishift = FP_SHIFT;
    while( ishift > 0 && aa < 0x70000000 ) {
        ishift--;
        aa <<= 1;
    }
    int32_t c = (int32_t) ( ( aa / bb ) << ishift );
    return( s > 0 ? c : (-c) );
}


// ----- END UTILITY -----


//-- mot3d
//-- by mot
//
//-- a simple wolfenstein 3d type
//-- ray-casting engine
//-->8
//-- settings/globals
//
//-- constants
//sdist=100	-- screen dist for 3d projection, measured in pixels
//crad=.15		-- camera radius for collision detection

const int32_t g_csdist = FTOFP(160.0f);emphasized text
const int32_t g_crad = FTOFP(.15f);

//
//otyp={				-- object types
//-- y  h    w solid
//	{ .33,.4, .5,true},
//	{-.36,.25,.25,false},
//	{   0, 1, .3,true},
//	{ .45,.45,.6,false},
//	{  .3,.5, .7,true}
//}
//
struct ObjectType {
    int32_t y;
    int32_t h;
    int32_t w;
    bool solid;
};

const ObjectType g_otyp[] = {
    { FTOFP( .33f), FTOFP( .4f), FTOFP( .5f), true },
    { FTOFP(-.36f), FTOFP(.25f), FTOFP(.25f), false },
    { FTOFP(  .0f), FTOFP(1.0f), FTOFP( .3f), true },
    { FTOFP( .45f), FTOFP(.45f), FTOFP( .6f), false },
    { FTOFP(  .3f), FTOFP( .5f), FTOFP( .7f), true }
};

//-- globals
//cam={1.5,1.5}		-- camera position
//ang=.5         -- camera direction
//obj={}									-- array of objects
//
int32_t m_tcam[2] = { FTOFP(1.5f), FTOFP(1.5f) };
int32_t m_ang = FTOFP(.5f);
int32_t m_ca, m_sa, m_dx, m_dy;

#define MAX_OBJECT_COUNT    128
struct ObjectData {
    int32_t pos[2];
    int32_t typ;
    int32_t rel[2];
};
ObjectData m_tobj[MAX_OBJECT_COUNT];
ObjectData *m_tpobj[MAX_OBJECT_COUNT];
int32_t m_nobj = 0;

//-- working
//zbuf={}        -- z-buffer. 1 entry per screen column.
//colsx,colsy={},{}
//ix,iy=0,0      -- input axes
//
int32_t m_zbuf[SCR_WIDTH];
int32_t m_colsx[SCR_WIDTH];
int32_t m_colsy[SCR_WIDTH];
int32_t m_ix = 0;
int32_t m_iy = 0;
int32_t m_maxz;

//-->8
//-- initialisation
//
//function _init()
void init() {
    for( int32_t i = 0; i < MAX_OBJECT_COUNT; i++ ) {
        m_tpobj[i] = &m_tobj[i];
    }
//
// -- scan map for objects
// -- populate obj array
// -- also detect camera start pt
// for y=0,31 do
//  for x=0,127 do
//   local m=mget(x,y)
//
    for( int32_t y = 0; y < 32; y++ ) {
        for( int32_t x = 0; x < 128; x++ ) {
            int32_t m = mget( ITOFP( x ), ITOFP( y ) );
//   -- camera start pt?
//   if m>=1 and m<=4 then
//    cam={x+.5,y+.5}
//    ang=(m-1)*-.25+.5 -- tile index is start direction
//
            if( m >= 1 && m <= 4 ) {
                m_tcam[0] = ITOFP(x) + FTOFP(.5f);
                m_tcam[1] = ITOFP(y) + FTOFP(.5f);
                m_ang = ( m - 1 ) * FTOFP(-.25f) + FTOFP(.5f);
//   -- object?
//   elseif m>=16 and m<48 then
//    add(obj,{pos={x+.5,y+.5},typ=m-16,rel={0,0}})
//   end
            } else if( m >= 16 && m < 48 ) {
                if( m_nobj < MAX_OBJECT_COUNT ) {
                    ObjectData &ob = *m_tpobj[m_nobj];
                    ob.pos[0] = ITOFP(x) + FTOFP(.5f);
                    ob.pos[1] = ITOFP(y) + FTOFP(.5f);
                    ob.typ = m - 16;
                    ob.rel[0] = 0;
                    ob.rel[1] = 0;
                    m_nobj++;
                }
            }
        }
//  end
// end
    }
//
// -- allocate z buffer entries
// for i=1,128 do
// 	add(zbuf,0)
// 	add(colsx,0)
// 	add(colsy,0)
// end
//end
    for( int32_t i = 0; i < SCR_WIDTH; i++ ) {
        m_zbuf[i] = 0;
        m_colsx[i] = 0;
        m_colsy[i] = 0;
    }
}
//
//-->8
//-- rendering
//
//function _draw()
void calc_draw() {
// -- calculate wall columns.
// -- no drawing occurs yet.
// -- this also calculates the
// -- max z, which tells us how
// -- much ceiling and floor need
// -- to be drawn.
// maxz=0
// drawwalls()
//
    m_maxz = 0;
    drawwalls();
}

void draw() {
// -- draw ceiling and floor
// drawceilingfloor()
    drawceilingfloor();
//
// -- now draw wall columns
// drawcols()
    drawcols();
//
// -- draw objects
// drawobjects()
    drawobjects();
//
//	print("cpu:"..flr(stat(1)*100).."%",10,10,7)
//end
}
//
//function drawwalls()
void drawwalls() {
// palt(0,false)
//
// -- sin/cos of camera angle
// local sa=sin(ang)
// local ca=cos(ang)
    /* isin( x ) : period 1024 => [-256, 256] */
    m_sa = isin( m_ang >> ( FP_SHIFT - 10 ) ) << 8;
    m_ca = isin( ( m_ang >> ( FP_SHIFT - 10 ) ) + 256 ) << 8;
    int32_t sa = m_sa, ca = m_ca;
//
// -- raycast map
//	for i=0,127 do
//
    for( int32_t i = 0; i < SCR_WIDTH; i++ ) {
//	 -- ray direction
//		local dx,dy=64-i,sdist
        int32_t dx0 = ITOFP( SCR_HALF_WIDTH - i );
        int32_t dy0 = g_csdist;
//
//		-- rotate by camera angle
//		dx,dy=ca*dx+sa*dy,ca*dy-sa*dx
        m_dx = fpmul( ca, dx0 ) - fpmul( sa, dy0 );
        m_dy = fpmul( ca, dy0 ) + fpmul( sa, dx0 );
        int32_t dx = m_dx, dy = m_dy;
//
//		-- tweak ray to avoid divide by 0
//		if(abs(dx)<0.001)dx=0.001
//		if(abs(dy)<0.001)dy=0.001
        if( fpabs( dx ) < FTOFP(.001f) ) dx = FTOFP(.001f);
        if( fpabs( dy ) < FTOFP(.001f) ) dy = FTOFP(.001f);
//
//		-- horizontal traversal
//		local hx,hy=cam[1],cam[2]
//		local hdx,hdy=sgn(dx),dy/abs(dx)
//		local hdz=hdx*sa+hdy*ca
//		local hz=0
        int32_t hx = m_tcam[0], hy = m_tcam[1];
        int32_t hdx = fpsgn( dx ), hdy = fpdiv( dy, fpabs( dx ) );
        int32_t hdz = fpmul( hdy, ca ) - fpmul( hdx, sa );
        int32_t hz = 0;
//
//  -- move to next horizontal boundary
//		local hstep=hx-flr(hx)
//		if(hdx>0)hstep=(1-hstep)
//		hx+=hdx*hstep
//		hy+=hdy*hstep
//		hz+=hdz*hstep
        int32_t hstep = hx - fpflr( hx );
        if( hdx > 0 ) hstep = ITOFP(1) - hstep;
        hx += fpmul( hdx, hstep );
        hy += fpmul( hdy, hstep );
        hz += fpmul( hdz, hstep );
//
//		-- vertical traversal
//  local vx,vy=cam[1],cam[2]
//  local vdx,vdy=dx/abs(dy),sgn(dy)
//  local vdz=vdx*sa+vdy*ca
//  local vz=0
        int32_t vx = m_tcam[0], vy = m_tcam[1];
        int32_t vdx = fpdiv( dx, fpabs( dy ) ), vdy = fpsgn( dy );
        int32_t vdz = fpmul( vdy, ca ) - fpmul( vdx, sa );
        int32_t vz = 0;
//
//  -- move to next vertical boundary
//  local vstep=vy-flr(vy)
//  if(vdy>0)vstep=(1-vstep)
//  vx+=vdx*vstep
//  vy+=vdy*vstep
//  vz+=vdz*vstep
        int32_t vstep = vy - fpflr( vy );
        if( vdy > 0 ) vstep = ITOFP(1) - vstep;
        vx += fpmul( vdx, vstep );
        vy += fpmul( vdy, vstep );
        vz += fpmul( vdz, vstep );
//
//::searchloop::
        int32_t csearchloop = 100;
        while( csearchloop-- > 0 ) {
//
//  -- is next boundary horizontal or vertical?
//  if hz<vz then
//
//   -- hit solid cell?
//   local m=mget(hx+hdx*0.5,hy)
//   if m>=48 then
//    drawcol(m,hy-flr(hy),hz,i)
//    goto raydone
//   end
            if( hz < vz ) {
                int32_t m = mget( hx + ( hdx >> 1 ), hy );
                if( m >= 48 ) {
                    drawcol( m, hy - fpflr( hy ), hz, i );
                    break;
                }
//
//   -- advance to next horizontal boundary
//   hx+=hdx
//   hy+=hdy
//   hz+=hdz
                hx += hdx;
                hy += hdy;
                hz += hdz;
//  else
//
//   -- hit solid cell?
//   local m=mget(vx,vy+vdy*0.5)
//   if m>=48 then
//    drawcol(m,vx-flr(vx),vz,i)
//    goto raydone
//   end
            } else {
                int32_t m = mget( vx, vy + ( vdy >> 1 ) );
                if( m >= 48 ) {
                    drawcol( m, vx - fpflr( vx ), vz, i );
                    break;
                }
//
//			-- advance to next vertical boundary
//			vx+=vdx
//			vy+=vdy
//			vz+=vdz
//  end
//  goto searchloop
                vx += vdx;
                vy += vdy;
                vz += vdz;
            }
        }
//::raydone::
//	end
//end
    }
}
//
//function drawcol(m,tx,z,x)
void drawcol( int32_t m, int32_t tx, int32_t z, int32_t x ) {
// -- draw wall column
// -- m  = map value
// -- tx = "texture" x coord (0-1)
// -- z  = z distance
// -- x  = screen x
//
// -- note:tline version costs an
// -- extra 11% cpu
//
// m-=48
//	local sx=((m%4)+tx)*32
//	local sy=flr(m/4)*32+32
    m -= 48;
    int32_t sx = ( ITOFP( m % 4 ) + tx ) * 32;
    int32_t sy = ITOFP( ( m / 4 ) * 32 + 32 );
//-- local h=sdist/z
//-- local y0,y1=ceil(64-h/2),ceil(64+h/2)
//--	sspr(sx,sy,1,32,x,y0,1,y1-y0)
//	colsx[x+1]=sx
//	colsy[x+1]=sy
//	zbuf[x+1]=z
//	maxz=max(maxz,z)
    m_colsx[x] = sx;
    m_colsy[x] = sy;
    m_zbuf[x] = z;
    m_maxz = fpmax( m_maxz, z );
//end
    }
//
//function drawcols()
void drawcols() {
    palt( 0, false );
// for x=0,127 do
    for( int32_t x = 0; x < SCR_WIDTH; x++ ) {
//  local h=sdist/zbuf[x+1]
//  local y0,y1=ceil(64-h/2),ceil(64+h/2)
//	 sspr(colsx[x+1],colsy[x+1],1,32,x,y0,1,y1-y0)
        int32_t h = fpdiv( g_csdist, m_zbuf[x] );
        int32_t y0 = fpceil( ITOFP(SCR_HALF_HEIGHT) - ( h >> 1 ) ), y1 = fpceil( ITOFP(SCR_HALF_HEIGHT) + ( h >> 1 ) );
        int32_t sx = m_colsx[x], sy = m_colsy[x], z = m_zbuf[x];
        set_depth( z );
        sspr( sx, sy, ITOFP(1), ITOFP(32), ITOFP( x ), y0, ITOFP(1), y1 - y0 );
// end
//end
    }
}
//
//function drawobjects()
void drawobjects() {
// palt(0,true)
    palt( 0, true );
//
//	-- draw objects
// local sa=sin(-ang)
// local ca=cos(-ang)
// local r={0,0}
    int32_t sa = -m_sa, ca = m_ca;
    int32_t r[2] = { 0, 0 };
// for i=1,#obj do
    for( int32_t i = 0; i < m_nobj; i++ ) {
//  local ob=obj[i]
        ObjectData *pob = m_tpobj[i];
//	 r[1]=ob.pos[1]-cam[1]
//	 r[2]=ob.pos[2]-cam[2]
        r[0] = pob->pos[0] - m_tcam[0];
        r[1] = pob->pos[1] - m_tcam[1];
//	 ob.rel[1]=-ca*r[1]-sa*r[2]
//	 ob.rel[2]=ca*r[2]-sa*r[1]
        pob->rel[0] = fpmul( sa, r[1] ) - fpmul( ca, r[0] );
        pob->rel[1] = fpmul( sa, r[0] ) + fpmul( ca, r[1] );
//  local j=i
//  while j>1 and obj[j-1].rel[2]<ob.rel[2] do
//   obj[j]=obj[j-1]
//   j-=1
//  end
//	 obj[j]=ob
//	end
        int32_t j = i;
        for( ; j > 0 && m_tpobj[j-1]->rel[1] < pob->rel[1]; j-- ) {
            m_tpobj[j] = m_tpobj[j-1];
        }
        m_tpobj[j] = pob;
    }
//
//	for ob in all(obj) do
    for( int32_t iobj = 0; iobj < m_nobj; iobj++ ) {
        ObjectData &ob = *m_tpobj[iobj];
//	 local z=ob.rel[2]
        int32_t z = ob.rel[1];
//	 if(z<0.01)goto objdone
        if( z < FTOFP(.01f) ) break;
//	 local t=otyp[ob.typ+1]
//	 local f=sdist/z
        const ObjectType &t = g_otyp[ob.typ];
        int32_t f = fpdiv( g_csdist, z );
//	 local x=ob.rel[1]*f+64
//	 local y=t[1]*f+64
        int32_t x = fpmul( ob.rel[0], f ) + ITOFP(SCR_HALF_WIDTH);
        int32_t y = fpmul( t.y, f ) + ITOFP(SCR_HALF_HEIGHT);
//	 local h=t[2]*f
//	 local w=t[3]*f
        int32_t h = fpmul( t.h, f );
        int32_t w = fpmul( t.w, f );
//	 local y0,y1=ceil(y-h/2),ceil(y+h/2)
        int32_t y0 = fpceil( y - ( h >> 1 ) ), y1 = fpceil( y + ( h >> 1 ) );
//	 local sx=(ob.typ%16)*16
        int32_t sx = ITOFP( ( ob.typ % 16 ) * 16 );
//	 local sy=flr(ob.typ/16)*16+96
        int32_t sy = ITOFP( ( ob.typ / 16 ) * 16 + 96 );
//	 local sxd=16/w
//	 local x0=x-w/2
//	 local x1=x+w/2
        int32_t sxd = fpdiv( ITOFP(16), w );
        int32_t x0 = x - ( w >> 1 );
        int32_t x1 = x + ( w >> 1 );
//	 sx+=(ceil(x0)-x0)*sxd
//	 x0=ceil(x0)
//	 x1=flr(x1)
        sx += fpmul( ( fpceil(x0) - x0 ), sxd );
        x0 = fpceil( x0 );
        x1 = fpflr( x1 );
//	 if x0<0 then
//	  sx-=sxd*x0
//	 	x0=0
//	 end
        if( x0 < 0 ) {
            sx -= fpmul( sxd, x0 );
            x0 = 0;
        }
//	 if(x1>127)x1=127
        if( x1 > ITOFP(SCR_WIDTH-1) ) x1 = ITOFP(SCR_WIDTH-1);
//	 if x1>=x0 then
//	  for x=x0,x1 do
//			 if z<zbuf[x+1] then
//			  sspr(sx,sy,1,16,x,y0,1,y1-y0)
//			 end
//			 sx+=sxd
//	  end
//	 end
        if( x1 >= x0 ) {
            x0 = FPTOI(x0);
            x1 = FPTOI(x1);
            for( int32_t x = x0; x <= x1; x++ ) {
                if( z < m_zbuf[x] ) {
                    set_depth( z );
                    sspr( sx, sy, ITOFP(1), ITOFP(16), ITOFP(x), y0, ITOFP(1), y1 - y0 );
                }
                sx += sxd;
            }
        }
//	end
//::objdone::
//end
    }
}
//
//function drawceilingfloor()
void drawceilingfloor() {
// palt(0,false)
    palt( 0, false );
// local h=sdist/maxz
    int32_t h = fpdiv( g_csdist, m_maxz );
//
//	-- draw ceiling
// poke(0x5f38,4)
// poke(0x5f39,4)
// poke(0x5f3a,124)
// poke(0x5f3b,4)
//	for y=-64,-ceil(h/2) do
//	 drawrow(y,0.5)
//	end
    if( true ) {
        poke( ITOFP(0x5f38), ITOFP(4) );
        poke( ITOFP(0x5f39), ITOFP(4) );
        poke( ITOFP(0x5f3a), ITOFP(124) );
        poke( ITOFP(0x5f3b), ITOFP(4) );
        int32_t y0 = -SCR_HALF_HEIGHT;
        int32_t y1 = FPTOI( -fpceil( h >> 1 ) );
        for( int32_t y = y0; y <= y1; y++ ) {
            drawrow( ITOFP( y ), FTOFP(.5f) );
        }
    }
//
//	-- draw floor
// poke(0x5f38,4)
// poke(0x5f39,4)
// poke(0x5f3a,124)
// poke(0x5f3b,0)
//	for y=ceil(h/2),63 do
//	 drawrow(y,0.25)
//	end
    if( true ) {
        poke( ITOFP(0x5f38), ITOFP(4) );
        poke( ITOFP(0x5f39), ITOFP(4) );
        poke( ITOFP(0x5f3a), ITOFP(124) );
        poke( ITOFP(0x5f3b), ITOFP(0) );
        int32_t y0 = FPTOI( fpceil( h >> 1 ) );
        int32_t y1 = SCR_HALF_HEIGHT - 1;
        for( int32_t y = y0; y <= y1; y++ ) {
            drawrow( ITOFP( y ), FTOFP(.25f) );
        }
    }
//end
}
//
//function drawrow(y,tilesize)
void drawrow( int32_t y, int32_t tilesize ) {
//
// local sa=sin(ang)
// local ca=cos(ang)
    int32_t sa = m_sa, ca = m_ca;
//
//  -- gradient of floor ray
//  local g=abs(y)/sdist
    int32_t g = fpdiv( fpabs( y ), g_csdist );
//
//  -- z distance to intersection
//  local z=0.5/g
    int32_t z = fpdiv( FTOFP(.5f), g );
    set_depth( z );
//
//  -- map coords for center of floor
//  local mx,my=(cam[1]+z*sa)/tilesize,(cam[2]+z*ca)/tilesize
    int32_t mx = fpdiv( m_tcam[0] - fpmul( z, sa ), tilesize );
    int32_t my = fpdiv( m_tcam[1] + fpmul( z, ca ), tilesize );
//
//  -- scale
//  local s=sdist/z*tilesize
//  local mdx,mdy=-ca/s,sa/s
    int32_t s = fpmul( fpdiv( g_csdist, z ), tilesize );
    int32_t mdx = -fpdiv( ca, s ), mdy = -fpdiv( sa, s );
//
//  -- move left 64 pixels
//  mx-=64*mdx
//  my-=64*mdy
    mx -= SCR_HALF_WIDTH * mdx;
    my -= SCR_HALF_WIDTH * mdy;
//
//  -- draw line
//  tline(0,y+64,128,y+64,mx,my,mdx,mdy)
    tline( 0, y + ITOFP(SCR_HALF_HEIGHT), ITOFP(SCR_WIDTH), y + ITOFP(SCR_HALF_HEIGHT), mx, my, mdx, mdy );
//
//end
}
//-->8
//-- gameplay
//
//function _update60()
void update60() {
//
// -- move/rotate camera
// updateinput()
    updateinput();
// local v={sin(ang)*-iy*0.04,cos(ang)*-iy*0.04}
    int32_t v[2];
    int32_t sa = m_sa, ca = m_ca;
    v[0] = fpmul( fpmul( sa, m_iy ), FTOFP(.04f) );
    v[1] = fpmul( fpmul( ca, -m_iy ), FTOFP(.04f) );
//	ang+=ix*0.0075
    m_ang += fpmul( m_ix, FTOFP(.0075f) );
//
// -- collision detection and sliding logic
// if not iscol({cam[1]+v[1],cam[2]+v[2]},crad) then
// 	cam[1]+=v[1]
// 	cam[2]+=v[2]
// elseif not iscol({cam[1]+v[1],cam[2]},crad) then
// 	cam[1]+=v[1]
// elseif not iscol({cam[1],cam[2]+v[2]},crad) then
//  cam[2]+=v[2]
// end
    if( !iscol( m_tcam[0] + v[0], m_tcam[1] + v[1], g_crad ) ) {
        m_tcam[0] += v[0];
        m_tcam[1] += v[1];
    } else if( !iscol( m_tcam[0] + v[0], m_tcam[1], g_crad ) ) {
        m_tcam[0] += v[0];
    } else if( !iscol( m_tcam[0], m_tcam[1] + v[1], g_crad ) ) {
        m_tcam[1] += v[1];
    } else {
        m_iy = 0;
    }
    calc_draw();
//end
}
//
//-- check for collision
//-- p = point32_t to check
//-- r = radius
//function iscol(p,r)
bool iscol( int32_t px, int32_t py, int32_t r ) {
// for x=flr(p[1]-r),flr(p[1]+r) do
//  for y=flr(p[2]-r),flr(p[2]+r) do
    int32_t x0 = FPTOI( fpflr( px - r ) );
    int32_t x1 = FPTOI( fpflr( px + r ) );
    int32_t y0 = FPTOI( fpflr( py - r ) );
    int32_t y1 = FPTOI( fpflr( py + r ) );
    for( int32_t x = x0; x <= x1; x++ ) {
        for( int32_t y = y0; y <= y1; y++ ) {
//   local m=mget(x,y)
            int32_t m = mget( ITOFP( x ), ITOFP( y ) );
//   if(m>=48)return true
            if( m >= 48 ) return( true );
//   if m>=16 then
//    if otyp[m-15][4] then
            if( m >= 16 ) {
                const ObjectType &t = g_otyp[m-16];
                if( t.solid ) {
//	    local o={x+.5-p[1],y+.5-p[2]}
// 	   local r2=o[1]*o[1]+o[2]*o[2]
//  	  if(r2<.15)return true
                    int32_t ox = ITOFP(x) + FTOFP(.5f) - px;
                    int32_t oy = ITOFP(y) + FTOFP(.5f) - py;
                    int32_t r2 = fpmul( ox, ox ) + fpmul( oy, oy );
                    if( r2 < FTOFP(.15f) ) return( true );
//  	 end
//   end
//  end
// end
// return false
//end
                }
            }
        }
    }
    return( false );
}
//
//function updateinput()
void updateinput() {
// local x,y=0,0
    int32_t x = 0, y = 0;
// if(btn(⬅️))x-=1
// if(btn(➡️))x+=1
// if(btn(⬆️))y-=1
// if(btn(⬇️))y+=1
    if( pressed( DEVICE_KEY_LEFT ) ) x -= ITOFP(1);
    if( pressed( DEVICE_KEY_RIGHT ) ) x += ITOFP(1);
    if( pressed( DEVICE_KEY_UP ) ) y -= ITOFP(1);
    if( pressed( DEVICE_KEY_DOWN ) ) y += ITOFP(1);
// ix=ix*.875+x*.125
// iy=iy*.9+y*.1
    m_ix = fpmul( m_ix, FTOFP(.875f) ) + fpmul( x, FTOFP(.125f) );
    m_iy = fpmul( m_iy, FTOFP(.9f) ) + fpmul( y, FTOFP(.1f) );
    if( fpabs( m_ix ) < FTOFP(.001f) ) m_ix = 0;
    if( fpabs( m_iy ) < FTOFP(.001f) ) m_iy = 0;
//end
//
}

7 Likes

But it looks so good! Do you have tricks to speed it up?

1 Like

Hi, Filmote! The profiler says the fixed point division is pretty slow so I have changed the ‘extact’ one that requires 64 bit integer to a less accurate one using only 32 bit integers of my own:

int32_t fpdiv( int32_t a, int32_t b ) {
    int32_t s = 1;
    uint32_t aa, bb;
    if( a < 0 ) {
        s = -s;
        aa = (uint32_t) (-a);
    } else {
        aa = (uint32_t) a;
    }
    if( b < 0 ) {
        s = -s;
        bb = (uint32_t) (-b);
    } else {
        bb = (uint32_t) b;
    }
    uint8_t ishift = FP_SHIFT;
    while( ishift > 0 && aa < 0x70000000 ) {
        ishift--;
        aa <<= 1;
    }
    int32_t c = (int32_t) ( ( aa / bb ) << ishift );
    return( s > 0 ? c : (-c) );
}

Less accuracy means that the edges between walls fail to render properly more often.

But the main bottleneck is the tline() drawing function that is used both for ceiling and floor. I have squeezed some speed from it, but I’m afraid I can’t optimize further without going into the marvellous world of ASM.

So I guess the new version is the fastest it can go full screen full resolution.

P.S. SBDL internally is using a 8bpp image buffer smaller than full screen because full screen wouldn’t fit in RAM, so it takes several passes to render full screen, and it also takes its performance hit.

Of course, some framerate improvement can be achieved by reducing visual quality. The most obvious way to do so is doubling pixels on ceiling and floor:

SkyBerron Mot3d.bin.1_speed

SkyBerron Mot3d pixel doubling.bin (91.5 KB)

8 Likes

I found some speed issues related to overdrawing and use of divs that could be optimized. Once you start optimizing code, you can’t stop! :sweat_smile: I also fixed some bugs and adjusted field of view as resolution is higher than Pico 8. I uploaded the new version (third version) to first post. It should run smoother now.

1 Like