Improving FPS


#182

The mixmode example would be fine for testing, failing that (I know not everyone has all of the examples) then this is what I’ve started…PokittOS.zip (423.2 KB)


#183

The scanline fix I mentioned in the previous comment and moves scanlineIndex outside the y loop.

Possibly an array out of bounds error.

What does it do on an actual Pokitto?

Half the reason I use PokittoIO is so I don’t have to download the 20-odd examples.

I get an undefined reference to Pokitto::Display::scanType,
which means either scanType is something you’ve added locally or it’s in PokittoLib and not PokittoIO.


I also get a list of warnings:
Problems

The “array subscript is above array bounds” warnings are the most serious ones.


#184

It’s in pokittolib. I was added a while back with the mixmode example.


#185

This seems to be working perfectly, a little faster than the original.
Do you think moving the palette shifting to the loadpalette functions would make any difference? I don’t see a need to do that every single frame?

void lcdRefreshMixModeImplementation(const uint8_t screenBuffer[], const uint8_t scanTypes[], const uint16_t palette8bpp[], const uint32_t (&palette4bpp)[16], const uint32_t (&palette2bpp)[4]);

void Pokitto::lcdRefreshMixMode(const uint8_t * screenBuffer, const uint16_t * palette, const uint8_t * scanTypes)
{
	constexpr size_t palette8bppCount = (1 << 8);
	constexpr size_t palette4bppCount = (1 << 4);
	constexpr size_t palette2bppCount = (1 << 2);

	constexpr size_t palette8bppOffset = 0;
	constexpr size_t palette4bppOffset = (palette8bppOffset + palette8bppCount);
	constexpr size_t palette2bppOffset = (palette4bppOffset + palette4bppCount);

	uint32_t palette4bpp[palette4bppCount];
	uint32_t palette2bpp[palette2bppCount];

	const uint16_t * palette4bppBase = &palette[palette4bppOffset];
	for(size_t index = 0; index < palette4bppCount; ++index)
	{
		uint32_t colour = palette4bppBase[index];
		palette4bpp[index] = (colour << 3);
	}

	const uint16_t * palette2bppBase = &palette[palette2bppOffset];
	for(size_t index = 0; index < palette2bppCount; ++index)
	{
		uint32_t colour = palette2bppBase[index];
		palette2bpp[index] = (colour << 3);
	}

	lcdRefreshMixModeImplementation(screenBuffer, scanTypes, palette, palette4bpp, palette2bpp);
}

void lcdRefreshMixModeImplementation(const uint8_t screenBuffer[], const uint8_t scanTypes[], const uint16_t palette8bpp[], const uint32_t (&palette4bpp)[16], const uint32_t (&palette2bpp)[4])
{
	write_command(0x03);
	write_data(0x1038);

	// Horizontal DRAM Address
	write_command(0x20);
	write_data(0);

	// Vertical DRAM Address
	write_command(0x21);
	write_data(0);

	// write data to DRAM
	write_command(0x22);
	CLR_CS_SET_CD_RD_WR;
	SET_MASK_P2;

	uint32_t scanline[220];

	// point to beginning of line in data
	const uint8_t * d = screenBuffer;

	for(size_t y = 0; y < 176; ++y)
	{
		// find colours in one scanline
		const uint8_t scanTypeIndex = (y >> 1);
		uint16_t scanlineIndex = 0;
		switch(scanTypes[scanTypeIndex])
		{
			case 0: // 8bpp
			{
				// Point to the beginning of the line in data
				d = &screenBuffer[110 * scanTypeIndex];

				#define STEP() \
					{ \
					int color = static_cast<uint32_t>(palette8bpp[*d++]) << 3;\
					scanline[scanlineIndex++] = color;\
					scanline[scanlineIndex++] = color;\
					}

				for(uint8_t x = 0; x < 11; ++x)
				{
					STEP();
					STEP();
					STEP();
					STEP();
					STEP();
					STEP();
					STEP();
					STEP();
					STEP();
					STEP();
				}
				break;

				#undef STEP
			}
			case 1: // 4bpp
			{
				#define STEP() \
					{ \
						const uint8_t value = *d++; \
                        uint32_t color1 = palette4bpp[((value >> 4) & 0x0F)];\
                        uint32_t color2 = palette4bpp[((value >> 4) & 0x0F)];\
                        uint32_t color3 = palette4bpp[((value >> 0) & 0x0F)];\
                        uint32_t color4 = palette4bpp[((value >> 0) & 0x0F)];\
                        scanline[scanlineIndex++] = color1;\
                        scanline[scanlineIndex++] = color2;\
                        scanline[scanlineIndex++] = color3;\
                        scanline[scanlineIndex++] = color4;\
					}

				for(uint8_t x = 0; x < 11; ++x)
				{
					STEP();
					STEP();
					STEP();
					STEP();
					STEP();
				}
				break;

				#undef STEP
			}
			case 2: // 2bpp
			{
				// Point to the beginning of the line in data

				#define STEP() \
					{ \
						const uint8_t value = *d++; \
                        uint32_t color1 = palette2bpp[((value >> 6) & 0x03)];\
                        uint32_t color2 = palette2bpp[((value >> 4) & 0x03)];\
                        uint32_t color3 = palette2bpp[((value >> 2) & 0x03)];\
                        uint32_t color4 = palette2bpp[((value >> 0) & 0x03)];\
                        scanline[scanlineIndex++] = color1;\
                        scanline[scanlineIndex++] = color2;\
                        scanline[scanlineIndex++] = color3;\
                        scanline[scanlineIndex++] = color4;\
					}

				for(uint8_t x = 0; x < 11; ++x)
				{
					STEP();
					STEP();
					STEP();
					STEP();
					STEP();
				}

				#undef STEP

				break;
			}
		}

        #define WRITE_SCANLINE() \
            *LCD = color;\
             TGL_WR_OP(color = scanline[++scanlineIndex]);

		#define WRITE_SCANLINE_2() \
			WRITE_SCANLINE(); \
			WRITE_SCANLINE();

		#define WRITE_SCANLINE_10() \
			WRITE_SCANLINE_2(); \
			WRITE_SCANLINE_2(); \
			WRITE_SCANLINE_2(); \
			WRITE_SCANLINE_2(); \
			WRITE_SCANLINE_2();

		#define WRITE_SCANLINE_20() \
			WRITE_SCANLINE_10(); \
			WRITE_SCANLINE_10();

		{
            volatile uint32_t * LCD = reinterpret_cast< volatile uint32_t * >(0xA0002188);
            uint32_t color = scanline[0];
			size_t scanlineIndex = 0;

			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
		}

		#undef WRITE_SCANLINE
	}

	CLR_MASK_P2;
}

#186

added this to PokittoDisplay.h

static uint32_t mixPalette[276];

then load my palette pre-shifted:

// load only part of a palette if needed
void load8bitPalette(const uint16_t* p) {
//    if(numCols-from > 255)return;
    for (int i=0; i<=256; i++) game.display.mixPalette[i] = p[i]<<3;
    game.display.mixpaletteptr = game.display.mixPalette;
}

void load4bitPalette(const uint16_t* p) {
//    if(numCols-from > 15)return;
    for (int i=0; i<16; i++) game.display.mixPalette[i+256] = p[i]<<3;
    game.display.mixpaletteptr = game.display.mixPalette;
}

void load2bitPalette(const uint16_t* p) {
//    if(numCols-from > 3)return;
    for (int i=0; i<4; i++) game.display.mixPalette[i+272] = p[i]<<3;
    game.display.mixpaletteptr = game.display.mixPalette;
}

and remove the palette shifting from the display update…

void Pokitto::lcdRefreshMixMode(const uint8_t * screenBuffer, const uint32_t * palette8bpp, const uint8_t * scanTypes)
{
	write_command(0x03);
	write_data(0x1038);

	// Horizontal DRAM Address
	write_command(0x20);
	write_data(0);

	// Vertical DRAM Address
	write_command(0x21);
	write_data(0);

	// write data to DRAM
	write_command(0x22);
	CLR_CS_SET_CD_RD_WR;
	SET_MASK_P2;

	uint32_t scanline[220];

	// point to beginning of line in data
	const uint8_t * d = screenBuffer;

	for(size_t y = 0; y < 176; ++y)
	{
		// find colours in one scanline
		const uint8_t scanTypeIndex = (y >> 1);
		uint16_t scanlineIndex = 0;
		switch(scanTypes[scanTypeIndex])
		{
			case 0: // 8bpp
			{
				// Point to the beginning of the line in data
				d = &screenBuffer[110 * scanTypeIndex];

				#define STEP() \
					{ \
					int color = static_cast<uint32_t>(palette8bpp[*d++]);\
					scanline[scanlineIndex++] = color;\
					scanline[scanlineIndex++] = color;\
					}

                #define STEP11()\
                {\
					STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();\
                }

                    STEP11();STEP11();STEP11();STEP11();STEP11();STEP11();STEP11();STEP11();STEP11();STEP11();


				break;

				#undef STEP
			}
			case 1: // 4bpp
			{
				#define STEP() \
					{ \
						const uint8_t value = *d++; \
                        uint32_t color1 = palette8bpp[((value >> 4) & 0x0F)+256];\
                        uint32_t color2 = palette8bpp[((value >> 0) & 0x0F)+256];\
                        scanline[scanlineIndex++] = color1;\
                        scanline[scanlineIndex++] = color1;\
                        scanline[scanlineIndex++] = color2;\
                        scanline[scanlineIndex++] = color2;\
					}

                #define STEP11()\
                {\
					STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();\
                }

                STEP11();STEP11();STEP11();STEP11();STEP11();

				break;

				#undef STEP
			}
			case 2: // 2bpp
			{
				// Point to the beginning of the line in data
				#define STEP() \
					{ \
						const uint8_t value = *d++; \
                        uint32_t color1 = palette8bpp[((value >> 6) & 0x03)+272];\
                        uint32_t color2 = palette8bpp[((value >> 4) & 0x03)+272];\
                        uint32_t color3 = palette8bpp[((value >> 2) & 0x03)+272];\
                        uint32_t color4 = palette8bpp[((value >> 0) & 0x03)+272];\
                        scanline[scanlineIndex++] = color1;\
                        scanline[scanlineIndex++] = color2;\
                        scanline[scanlineIndex++] = color3;\
                        scanline[scanlineIndex++] = color4;\
					}

                #define STEP11()\
                {\
					STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();	STEP();\
                }

                STEP11();STEP11();STEP11();STEP11();STEP11();

				#undef STEP

				break;
			}
		}

        #define WRITE_SCANLINE() \
            *LCD = color;\
             TGL_WR_OP(color = scanline[++scanlineIndex]);

		#define WRITE_SCANLINE_2() \
			WRITE_SCANLINE(); \
			WRITE_SCANLINE();

		#define WRITE_SCANLINE_10() \
			WRITE_SCANLINE_2(); \
			WRITE_SCANLINE_2(); \
			WRITE_SCANLINE_2(); \
			WRITE_SCANLINE_2(); \
			WRITE_SCANLINE_2();

		#define WRITE_SCANLINE_20() \
			WRITE_SCANLINE_10(); \
			WRITE_SCANLINE_10();

		{
            volatile uint32_t * LCD = reinterpret_cast< volatile uint32_t * >(0xA0002188);
            uint32_t color = scanline[0];
			size_t scanlineIndex = 0;

			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
			WRITE_SCANLINE_20();
		}

		#undef WRITE_SCANLINE
	}

	CLR_MASK_P2;
}

I can’t help thinking that perhaps displaying more than one line at a time might be faster, especially as I’m splitting the screen into 88 lines for mode switching. However my attempts have resulted in garbled data.
Does anyone think it would help?