The mixmode example would be fine for testing, failing that (I know not everyone has all of the examples) then this is what I’ve started…PokittOS.zip (423.2 KB)
The scanline fix I mentioned in the previous comment and moves scanlineIndex
outside the y
loop.
Possibly an array out of bounds error.
What does it do on an actual Pokitto?
Half the reason I use PokittoIO is so I don’t have to download the 20-odd examples.
I get an undefined reference to Pokitto::Display::scanType
,
which means either scanType
is something you’ve added locally or it’s in PokittoLib and not PokittoIO.
I also get a list of warnings:
The “array subscript is above array bounds” warnings are the most serious ones.
It’s in pokittolib. I was added a while back with the mixmode example.
This seems to be working perfectly, a little faster than the original.
Do you think moving the palette shifting to the loadpalette functions would make any difference? I don’t see a need to do that every single frame?
void lcdRefreshMixModeImplementation(const uint8_t screenBuffer[], const uint8_t scanTypes[], const uint16_t palette8bpp[], const uint32_t (&palette4bpp)[16], const uint32_t (&palette2bpp)[4]);
void Pokitto::lcdRefreshMixMode(const uint8_t * screenBuffer, const uint16_t * palette, const uint8_t * scanTypes)
{
constexpr size_t palette8bppCount = (1 << 8);
constexpr size_t palette4bppCount = (1 << 4);
constexpr size_t palette2bppCount = (1 << 2);
constexpr size_t palette8bppOffset = 0;
constexpr size_t palette4bppOffset = (palette8bppOffset + palette8bppCount);
constexpr size_t palette2bppOffset = (palette4bppOffset + palette4bppCount);
uint32_t palette4bpp[palette4bppCount];
uint32_t palette2bpp[palette2bppCount];
const uint16_t * palette4bppBase = &palette[palette4bppOffset];
for(size_t index = 0; index < palette4bppCount; ++index)
{
uint32_t colour = palette4bppBase[index];
palette4bpp[index] = (colour << 3);
}
const uint16_t * palette2bppBase = &palette[palette2bppOffset];
for(size_t index = 0; index < palette2bppCount; ++index)
{
uint32_t colour = palette2bppBase[index];
palette2bpp[index] = (colour << 3);
}
lcdRefreshMixModeImplementation(screenBuffer, scanTypes, palette, palette4bpp, palette2bpp);
}
void lcdRefreshMixModeImplementation(const uint8_t screenBuffer[], const uint8_t scanTypes[], const uint16_t palette8bpp[], const uint32_t (&palette4bpp)[16], const uint32_t (&palette2bpp)[4])
{
write_command(0x03);
write_data(0x1038);
// Horizontal DRAM Address
write_command(0x20);
write_data(0);
// Vertical DRAM Address
write_command(0x21);
write_data(0);
// write data to DRAM
write_command(0x22);
CLR_CS_SET_CD_RD_WR;
SET_MASK_P2;
uint32_t scanline[220];
// point to beginning of line in data
const uint8_t * d = screenBuffer;
for(size_t y = 0; y < 176; ++y)
{
// find colours in one scanline
const uint8_t scanTypeIndex = (y >> 1);
uint16_t scanlineIndex = 0;
switch(scanTypes[scanTypeIndex])
{
case 0: // 8bpp
{
// Point to the beginning of the line in data
d = &screenBuffer[110 * scanTypeIndex];
#define STEP() \
{ \
int color = static_cast<uint32_t>(palette8bpp[*d++]) << 3;\
scanline[scanlineIndex++] = color;\
scanline[scanlineIndex++] = color;\
}
for(uint8_t x = 0; x < 11; ++x)
{
STEP();
STEP();
STEP();
STEP();
STEP();
STEP();
STEP();
STEP();
STEP();
STEP();
}
break;
#undef STEP
}
case 1: // 4bpp
{
#define STEP() \
{ \
const uint8_t value = *d++; \
uint32_t color1 = palette4bpp[((value >> 4) & 0x0F)];\
uint32_t color2 = palette4bpp[((value >> 4) & 0x0F)];\
uint32_t color3 = palette4bpp[((value >> 0) & 0x0F)];\
uint32_t color4 = palette4bpp[((value >> 0) & 0x0F)];\
scanline[scanlineIndex++] = color1;\
scanline[scanlineIndex++] = color2;\
scanline[scanlineIndex++] = color3;\
scanline[scanlineIndex++] = color4;\
}
for(uint8_t x = 0; x < 11; ++x)
{
STEP();
STEP();
STEP();
STEP();
STEP();
}
break;
#undef STEP
}
case 2: // 2bpp
{
// Point to the beginning of the line in data
#define STEP() \
{ \
const uint8_t value = *d++; \
uint32_t color1 = palette2bpp[((value >> 6) & 0x03)];\
uint32_t color2 = palette2bpp[((value >> 4) & 0x03)];\
uint32_t color3 = palette2bpp[((value >> 2) & 0x03)];\
uint32_t color4 = palette2bpp[((value >> 0) & 0x03)];\
scanline[scanlineIndex++] = color1;\
scanline[scanlineIndex++] = color2;\
scanline[scanlineIndex++] = color3;\
scanline[scanlineIndex++] = color4;\
}
for(uint8_t x = 0; x < 11; ++x)
{
STEP();
STEP();
STEP();
STEP();
STEP();
}
#undef STEP
break;
}
}
#define WRITE_SCANLINE() \
*LCD = color;\
TGL_WR_OP(color = scanline[++scanlineIndex]);
#define WRITE_SCANLINE_2() \
WRITE_SCANLINE(); \
WRITE_SCANLINE();
#define WRITE_SCANLINE_10() \
WRITE_SCANLINE_2(); \
WRITE_SCANLINE_2(); \
WRITE_SCANLINE_2(); \
WRITE_SCANLINE_2(); \
WRITE_SCANLINE_2();
#define WRITE_SCANLINE_20() \
WRITE_SCANLINE_10(); \
WRITE_SCANLINE_10();
{
volatile uint32_t * LCD = reinterpret_cast< volatile uint32_t * >(0xA0002188);
uint32_t color = scanline[0];
size_t scanlineIndex = 0;
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
}
#undef WRITE_SCANLINE
}
CLR_MASK_P2;
}
added this to PokittoDisplay.h
static uint32_t mixPalette[276];
then load my palette pre-shifted:
// load only part of a palette if needed
void load8bitPalette(const uint16_t* p) {
// if(numCols-from > 255)return;
for (int i=0; i<=256; i++) game.display.mixPalette[i] = p[i]<<3;
game.display.mixpaletteptr = game.display.mixPalette;
}
void load4bitPalette(const uint16_t* p) {
// if(numCols-from > 15)return;
for (int i=0; i<16; i++) game.display.mixPalette[i+256] = p[i]<<3;
game.display.mixpaletteptr = game.display.mixPalette;
}
void load2bitPalette(const uint16_t* p) {
// if(numCols-from > 3)return;
for (int i=0; i<4; i++) game.display.mixPalette[i+272] = p[i]<<3;
game.display.mixpaletteptr = game.display.mixPalette;
}
and remove the palette shifting from the display update…
void Pokitto::lcdRefreshMixMode(const uint8_t * screenBuffer, const uint32_t * palette8bpp, const uint8_t * scanTypes)
{
write_command(0x03);
write_data(0x1038);
// Horizontal DRAM Address
write_command(0x20);
write_data(0);
// Vertical DRAM Address
write_command(0x21);
write_data(0);
// write data to DRAM
write_command(0x22);
CLR_CS_SET_CD_RD_WR;
SET_MASK_P2;
uint32_t scanline[220];
// point to beginning of line in data
const uint8_t * d = screenBuffer;
for(size_t y = 0; y < 176; ++y)
{
// find colours in one scanline
const uint8_t scanTypeIndex = (y >> 1);
uint16_t scanlineIndex = 0;
switch(scanTypes[scanTypeIndex])
{
case 0: // 8bpp
{
// Point to the beginning of the line in data
d = &screenBuffer[110 * scanTypeIndex];
#define STEP() \
{ \
int color = static_cast<uint32_t>(palette8bpp[*d++]);\
scanline[scanlineIndex++] = color;\
scanline[scanlineIndex++] = color;\
}
#define STEP11()\
{\
STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP();\
}
STEP11();STEP11();STEP11();STEP11();STEP11();STEP11();STEP11();STEP11();STEP11();STEP11();
break;
#undef STEP
}
case 1: // 4bpp
{
#define STEP() \
{ \
const uint8_t value = *d++; \
uint32_t color1 = palette8bpp[((value >> 4) & 0x0F)+256];\
uint32_t color2 = palette8bpp[((value >> 0) & 0x0F)+256];\
scanline[scanlineIndex++] = color1;\
scanline[scanlineIndex++] = color1;\
scanline[scanlineIndex++] = color2;\
scanline[scanlineIndex++] = color2;\
}
#define STEP11()\
{\
STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP();\
}
STEP11();STEP11();STEP11();STEP11();STEP11();
break;
#undef STEP
}
case 2: // 2bpp
{
// Point to the beginning of the line in data
#define STEP() \
{ \
const uint8_t value = *d++; \
uint32_t color1 = palette8bpp[((value >> 6) & 0x03)+272];\
uint32_t color2 = palette8bpp[((value >> 4) & 0x03)+272];\
uint32_t color3 = palette8bpp[((value >> 2) & 0x03)+272];\
uint32_t color4 = palette8bpp[((value >> 0) & 0x03)+272];\
scanline[scanlineIndex++] = color1;\
scanline[scanlineIndex++] = color2;\
scanline[scanlineIndex++] = color3;\
scanline[scanlineIndex++] = color4;\
}
#define STEP11()\
{\
STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP(); STEP();\
}
STEP11();STEP11();STEP11();STEP11();STEP11();
#undef STEP
break;
}
}
#define WRITE_SCANLINE() \
*LCD = color;\
TGL_WR_OP(color = scanline[++scanlineIndex]);
#define WRITE_SCANLINE_2() \
WRITE_SCANLINE(); \
WRITE_SCANLINE();
#define WRITE_SCANLINE_10() \
WRITE_SCANLINE_2(); \
WRITE_SCANLINE_2(); \
WRITE_SCANLINE_2(); \
WRITE_SCANLINE_2(); \
WRITE_SCANLINE_2();
#define WRITE_SCANLINE_20() \
WRITE_SCANLINE_10(); \
WRITE_SCANLINE_10();
{
volatile uint32_t * LCD = reinterpret_cast< volatile uint32_t * >(0xA0002188);
uint32_t color = scanline[0];
size_t scanlineIndex = 0;
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
WRITE_SCANLINE_20();
}
#undef WRITE_SCANLINE
}
CLR_MASK_P2;
}
I can’t help thinking that perhaps displaying more than one line at a time might be faster, especially as I’m splitting the screen into 88 lines for mode switching. However my attempts have resulted in garbled data.
Does anyone think it would help?