Skip to content

Instantly share code, notes, and snippets.

@DedeHai
Last active December 31, 2025 07:55
Show Gist options
  • Select an option

  • Save DedeHai/8b882c232a9b2cf1ef6a6c69d16d0f4c to your computer and use it in GitHub Desktop.

Select an option

Save DedeHai/8b882c232a9b2cf1ef6a6c69d16d0f4c to your computer and use it in GitHub Desktop.
/*
Low level driver test to generate 4-lane outputs for LED data (WS281x) using 4-step-cadence to encode the bit signals
This is the result of many hours of trial and error - there is little to no documentation and whatever I found in online examples did not properly work for the C3 but finally figured out a way that works.
took some inspiration from an M2M dma trasnfer example here: https://esp32.com/viewtopic.php?t=39242
also found something very similar with a similar jurney (welcome to hell ;) )https://github.com/vladkorotnev/plasma-clock/blob/main/src/display/akizuki_k875.cpp this may also be a legit way, i.e. using high level API init and then modify, could be less brittle across SDK versions but I could not get it to work that way.
This code works in Arduino IDE 2.3.4 using "esp32 by Espressif Systems v. 3.3.5"
To anyone stumbling on this piece of code: feel free to put it to whatever use you want, no licensing restrictions.
If you find it useful to you, please feel free to buy me a beer or two :) paypal.me/dedehai
*/
#include "hal/spi_ll.h"
#include "soc/gdma_struct.h"
#include "hal/gdma_ll.h"
#include "soc/gdma_reg.h"
#include "esp_heap_caps.h"
//#include "driver/spi_master.h" // high level spi API (needed`?)
#include <Arduino.h>
// Constants
#define BUF_SIZE 1024
#define SPI_HOST SPI2_HOST
#define GDMA_CHANNEL 0
// DMA Buffers and Descriptors
uint8_t *buffer0, *buffer1;
lldesc_t dma_desc[2];
spi_dev_t *hw = &GPSPI2;
const int PIN_DAT0 = 0;
const int PIN_DAT1 = 1;
const int PIN_DAT2 = 2;
const int PIN_DAT3 = 3;
// Interrupt tracking
volatile uint8_t currentBuffer = 0;
volatile bool buffer0NeedsRefill = false;
volatile bool buffer1NeedsRefill = false;
// LED encoding constants
static constexpr uint16_t zeroBitPattern = 0x0001; // note: with 4 data lanes, each lane is one bit of a nibble so one byte encodes two clock cycles, 2 bytes encode one 4-step output bit.
static constexpr uint16_t oneBitPattern = 0x0011; // note: NPB uses 0x0111, fastled uses 0x0011. using 0x0011 and 0x0001 and slowing down the clock may yield better results and less flickering but that remains to be tested
// "LED data"
uint8_t* _laneData[4];
size_t _numbytes = 500; // 1 byte needs 16 bytes of buffer in 4-step mode (2 bytes per bit)
size_t resetbits = 5000;
#define TOTAL_BITS (_numbytes*16*8) + resetbits // 16 bytes encode one source byte times 8 bit (independent of number of channels, unused channels get all zeroes) note: max is 18bits or 262143 bits or 32kbytes or 2047 unencoded bytes or 682 RGB LEDs
size_t _framePos = 0;
bool issending;
void IRAM_ATTR EncodeChunk(uint8_t bufIdx)
{
// digitalWrite(21, HIGH);
uint8_t* dst = (bufIdx == 0) ? buffer0 : buffer1;
// if(bufIdx)
// memset(dst, 0xF0, BUF_SIZE); // test pattern
// else
memset(dst, 0x00, BUF_SIZE); // clear buffer, this also sets buffers to zero once sending is done so all additional clocks just output the "reset pulse" for leds.
if(!issending) return;
size_t maxSrcThisChunk = BUF_SIZE / 16; // 16 DMA bytes per source byte
size_t srcBytesLeft = (_framePos < _numbytes) ? (_numbytes - _framePos) : 0;
size_t srcThisChunk = (srcBytesLeft < maxSrcThisChunk) ? srcBytesLeft : maxSrcThisChunk;
if (srcThisChunk == 0) {
// done sending
issending = false;
_framePos = 0; // reset
//digitalWrite(21, LOW);
return;
}
for (uint8_t lane = 0; lane < 4; lane++) {
const uint16_t zerobit = zeroBitPattern << lane;
const uint16_t onebit = oneBitPattern << lane;
const uint8_t* src = _laneData[lane];
uint16_t* pOut = reinterpret_cast<uint16_t*>(dst);
for (size_t i = 0; i < srcThisChunk; i++) {
uint8_t v = src[_framePos + i];
*pOut++ |= (v & 0x80) ? onebit : zerobit; // WS281x LEDs use MSB first
*pOut++ |= (v & 0x40) ? onebit : zerobit; // note: unrolling the for-loop like this almost doubled the execution speed of this function! (74us instead of 125us in an arbitrary test)
*pOut++ |= (v & 0x20) ? onebit : zerobit; // note2: I did not test this on real LEDs, the signal encoding may not be 100% correct but the pattern looks legit (MSB/LSB may be flipped)
*pOut++ |= (v & 0x10) ? onebit : zerobit;
*pOut++ |= (v & 0x08) ? onebit : zerobit;
*pOut++ |= (v & 0x04) ? onebit : zerobit;
*pOut++ |= (v & 0x02) ? onebit : zerobit;
*pOut++ |= (v & 0x01) ? onebit : zerobit;
}
}
_framePos += srcThisChunk;
// digitalWrite(21, LOW);
}
// DMA interrupt handler
void IRAM_ATTR gdma_isr_handler(void* arg)
{
//digitalWrite(10, HIGH);
gdma_dev_t *dma = &GDMA;
// Check which descriptor completed (EOF interrupt)
if (dma->intr[GDMA_CHANNEL].st.out_eof) { // EOF interrupt
digitalWrite(10, HIGH);
if (currentBuffer == 0) {
digitalWrite(21, HIGH);
// Buffer 0 just finished
//buffer0NeedsRefill = true;
EncodeChunk(0);
currentBuffer = 1;
} else {
digitalWrite(21, LOW);
// Buffer 1 just finished
//buffer1NeedsRefill = true;
EncodeChunk(1);
currentBuffer = 0;
}
}
// Clear interrupts
dma->intr[GDMA_CHANNEL].clr.val = dma->intr[GDMA_CHANNEL].st.val;
digitalWrite(10, LOW);
}
void setupSPIPins() {
pinMode(PIN_DAT0, OUTPUT);
pinMode(PIN_DAT1, OUTPUT);
pinMode(PIN_DAT2, OUTPUT);
pinMode(PIN_DAT3, OUTPUT);
pinMode(10, OUTPUT); // debug
pinMode(21, OUTPUT); // debug
// Route SPI signals
pinMatrixOutAttach(11, FSPICLK_OUT_IDX, false, false); // spi clk is needed to clock the dma it seems, if this line is skipped, no real data is output, GPIO11 is dedicated to power internal flash VDD_SPI, however, it still works for "dummy routing"
// using gpio11 may be a bad idea... I saw no side effects in doing so, the pin voltage seems unaffected by this routing, not sure
pinMatrixOutAttach(PIN_DAT0, FSPID_OUT_IDX, false, false);
pinMatrixOutAttach(PIN_DAT1, FSPIQ_OUT_IDX, false, false);
pinMatrixOutAttach(PIN_DAT2, FSPIWP_OUT_IDX, false, false);
pinMatrixOutAttach(PIN_DAT3, FSPIHD_OUT_IDX, false, false);
}
void setup() {
Serial.begin(115200);
// Allocate DMA-capable memory
buffer0 = (uint8_t*)heap_caps_aligned_alloc(4, BUF_SIZE, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
buffer1 = (uint8_t*)heap_caps_aligned_alloc(4, BUF_SIZE, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL);
for (int i = 0; i < 4; i++) {
_laneData[i] = (uint8_t*)heap_caps_malloc(_numbytes, MALLOC_CAP_INTERNAL);
for(int j = 0; j < _numbytes; j++) {
_laneData[i][j] = j;
}
}
// Initialize with test patterns
for (int j = 0; j < BUF_SIZE; j+=4) {
buffer0[j] = 0xF7;
buffer0[j+1] = 0x31;
buffer0[j+2] = 0x00;
buffer0[j+3] = 0x00;
}
for (int j = 0; j < BUF_SIZE; j+=4) {
buffer1[j] = 0xF0;
buffer1[j+1] = 0x00;
buffer1[j+2] = 0xF0;
buffer1[j+3] = 0x00;
}
// Setup DMA Descriptors for Circular Transfer
dma_desc[0].size = BUF_SIZE;
dma_desc[0].length = BUF_SIZE;
dma_desc[0].owner = 1;
dma_desc[0].sosf = 0;
dma_desc[0].eof = 1; // Enable EOF interrupt
dma_desc[0].buf = buffer0;
dma_desc[0].qe.stqe_next = &dma_desc[1];
dma_desc[1].size = BUF_SIZE;
dma_desc[1].length = BUF_SIZE;
dma_desc[1].owner = 1;
dma_desc[1].sosf = 0;
dma_desc[1].eof = 1; // Enable EOF interrupt
dma_desc[1].buf = buffer1;
dma_desc[1].qe.stqe_next = &dma_desc[0]; // point back to the first buffer
// Enable peripheral clocks
SYSTEM.perip_clk_en0.reg_spi2_clk_en = 1; // note: this is a hack as this needs gating, its a shared register!!! cant be bothered right now for this test
SYSTEM.perip_rst_en0.reg_spi2_rst = 1;
SYSTEM.perip_rst_en0.reg_spi2_rst = 0;
SYSTEM.perip_clk_en1.reg_dma_clk_en = 1;
SYSTEM.perip_rst_en1.reg_dma_rst = 1;
SYSTEM.perip_rst_en1.reg_dma_rst = 0;
// Configure SPI2
spi_ll_master_init(hw);
spi_ll_master_set_mode(hw, 0);
spi_ll_set_tx_lsbfirst(hw, true); // send out LSB first instead of MSB first
spi_line_mode_t linemode = {};
linemode.data_lines = 4;
spi_ll_master_set_line_mode(hw, linemode);
spi_ll_master_set_clock(&GPSPI2, 80000000, 2600000, 128); //2.7MHz -> 360ns per bit, 0-> 360ns, 1-> 720ns, total: 1440ns (slow, safe mode), min is 2.5MHz for 400ns pulse. at 2.6MHz the measured pulse width is 390ns and 780s so pretty much a sweet spot
setupSPIPins();
spi_ll_set_mosi_bitlen(hw, TOTAL_BITS);
spi_ll_enable_mosi(hw, true);
spi_ll_dma_tx_enable(hw, true);
spi_ll_dma_tx_fifo_reset(hw);
spi_ll_outfifo_empty_clr(hw);
spi_ll_apply_config(hw);
// Configure GDMA
gdma_dev_t *dma = &GDMA;
gdma_ll_tx_reset_channel(dma, GDMA_CHANNEL);
gdma_ll_tx_connect_to_periph(dma, GDMA_CHANNEL, GDMA_TRIG_PERIPH_SPI, 0);
gdma_ll_tx_set_desc_addr(dma, GDMA_CHANNEL, (uint32_t)&dma_desc[0]);
gdma_ll_tx_start(dma, GDMA_CHANNEL);
// Enable EOF interrupt
dma->intr[GDMA_CHANNEL].ena.out_eof = 1;
dma->intr[GDMA_CHANNEL].clr.out_eof = 1; // clear eof interrupt flag
// Attach interrupt handler note: when I cleared all interrupts here, the DMA would not start...
esp_intr_alloc(ETS_DMA_CH0_INTR_SOURCE, ESP_INTR_FLAG_IRAM, gdma_isr_handler, NULL, NULL);
issending = true;
EncodeChunk(0);
EncodeChunk(1);
Serial.println("SPI DMA with interrupts start");
// Start SPI Transfer
spi_ll_user_start(hw);
}
void loop() {
if(issending) {
delay(1);
}
else {
// digitalWrite(21, LOW);
delay(5);
Serial.println("*");
while(!spi_ll_usr_is_done(hw)); // wait for finish transfer (TODO: this does not seem to hold up...)
// reset SPI so it continues triggering the DMA
spi_ll_dma_tx_fifo_reset(hw);
spi_ll_outfifo_empty_clr(hw);
spi_ll_apply_config(hw);
// reset DMA so it starts a fresh frame
gdma_dev_t *dma = &GDMA;
gdma_ll_tx_reset_channel(dma, GDMA_CHANNEL);
gdma_ll_tx_connect_to_periph(dma, GDMA_CHANNEL, GDMA_TRIG_PERIPH_SPI, 0);
gdma_ll_tx_set_desc_addr(dma, GDMA_CHANNEL, (uint32_t)&dma_desc[0]);
gdma_ll_tx_start(dma, GDMA_CHANNEL);
issending = true;
currentBuffer = 0;
EncodeChunk(0);
EncodeChunk(1);
//gdma_ll_tx_restart(dma, GDMA_CHANNEL); // -> does not reset the dma buffer pointers
spi_ll_user_start(hw);
//buffer0NeedsRefill = false;
//buffer1NeedsRefill = false;
}
// delay(200);
//Serial.print(".");
//spi_ll_user_start(hw);
//while(!spi_ll_usr_is_done(hw)); // wait for finish transfer
//delay(1);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment