Last active
December 31, 2025 07:55
-
-
Save DedeHai/8b882c232a9b2cf1ef6a6c69d16d0f4c to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Low level driver test to generate 4-lane outputs for LED data (WS281x) using 4-step-cadence to encode the bit signals | |
| This is the result of many hours of trial and error - there is little to no documentation and whatever I found in online examples did not properly work for the C3 but finally figured out a way that works. | |
| took some inspiration from an M2M dma trasnfer example here: https://esp32.com/viewtopic.php?t=39242 | |
| also found something very similar with a similar jurney (welcome to hell ;) )https://github.com/vladkorotnev/plasma-clock/blob/main/src/display/akizuki_k875.cpp this may also be a legit way, i.e. using high level API init and then modify, could be less brittle across SDK versions but I could not get it to work that way. | |
| This code works in Arduino IDE 2.3.4 using "esp32 by Espressif Systems v. 3.3.5" | |
| To anyone stumbling on this piece of code: feel free to put it to whatever use you want, no licensing restrictions. | |
| If you find it useful to you, please feel free to buy me a beer or two :) paypal.me/dedehai | |
| */ | |
| #include "hal/spi_ll.h" | |
| #include "soc/gdma_struct.h" | |
| #include "hal/gdma_ll.h" | |
| #include "soc/gdma_reg.h" | |
| #include "esp_heap_caps.h" | |
| //#include "driver/spi_master.h" // high level spi API (needed`?) | |
| #include <Arduino.h> | |
| // Constants | |
| #define BUF_SIZE 1024 | |
| #define SPI_HOST SPI2_HOST | |
| #define GDMA_CHANNEL 0 | |
| // DMA Buffers and Descriptors | |
| uint8_t *buffer0, *buffer1; | |
| lldesc_t dma_desc[2]; | |
| spi_dev_t *hw = &GPSPI2; | |
| const int PIN_DAT0 = 0; | |
| const int PIN_DAT1 = 1; | |
| const int PIN_DAT2 = 2; | |
| const int PIN_DAT3 = 3; | |
| // Interrupt tracking | |
| volatile uint8_t currentBuffer = 0; | |
| volatile bool buffer0NeedsRefill = false; | |
| volatile bool buffer1NeedsRefill = false; | |
| // LED encoding constants | |
| static constexpr uint16_t zeroBitPattern = 0x0001; // note: with 4 data lanes, each lane is one bit of a nibble so one byte encodes two clock cycles, 2 bytes encode one 4-step output bit. | |
| static constexpr uint16_t oneBitPattern = 0x0011; // note: NPB uses 0x0111, fastled uses 0x0011. using 0x0011 and 0x0001 and slowing down the clock may yield better results and less flickering but that remains to be tested | |
| // "LED data" | |
| uint8_t* _laneData[4]; | |
| size_t _numbytes = 500; // 1 byte needs 16 bytes of buffer in 4-step mode (2 bytes per bit) | |
| size_t resetbits = 5000; | |
| #define TOTAL_BITS (_numbytes*16*8) + resetbits // 16 bytes encode one source byte times 8 bit (independent of number of channels, unused channels get all zeroes) note: max is 18bits or 262143 bits or 32kbytes or 2047 unencoded bytes or 682 RGB LEDs | |
| size_t _framePos = 0; | |
| bool issending; | |
| void IRAM_ATTR EncodeChunk(uint8_t bufIdx) | |
| { | |
| // digitalWrite(21, HIGH); | |
| uint8_t* dst = (bufIdx == 0) ? buffer0 : buffer1; | |
| // if(bufIdx) | |
| // memset(dst, 0xF0, BUF_SIZE); // test pattern | |
| // else | |
| memset(dst, 0x00, BUF_SIZE); // clear buffer, this also sets buffers to zero once sending is done so all additional clocks just output the "reset pulse" for leds. | |
| if(!issending) return; | |
| size_t maxSrcThisChunk = BUF_SIZE / 16; // 16 DMA bytes per source byte | |
| size_t srcBytesLeft = (_framePos < _numbytes) ? (_numbytes - _framePos) : 0; | |
| size_t srcThisChunk = (srcBytesLeft < maxSrcThisChunk) ? srcBytesLeft : maxSrcThisChunk; | |
| if (srcThisChunk == 0) { | |
| // done sending | |
| issending = false; | |
| _framePos = 0; // reset | |
| //digitalWrite(21, LOW); | |
| return; | |
| } | |
| for (uint8_t lane = 0; lane < 4; lane++) { | |
| const uint16_t zerobit = zeroBitPattern << lane; | |
| const uint16_t onebit = oneBitPattern << lane; | |
| const uint8_t* src = _laneData[lane]; | |
| uint16_t* pOut = reinterpret_cast<uint16_t*>(dst); | |
| for (size_t i = 0; i < srcThisChunk; i++) { | |
| uint8_t v = src[_framePos + i]; | |
| *pOut++ |= (v & 0x80) ? onebit : zerobit; // WS281x LEDs use MSB first | |
| *pOut++ |= (v & 0x40) ? onebit : zerobit; // note: unrolling the for-loop like this almost doubled the execution speed of this function! (74us instead of 125us in an arbitrary test) | |
| *pOut++ |= (v & 0x20) ? onebit : zerobit; // note2: I did not test this on real LEDs, the signal encoding may not be 100% correct but the pattern looks legit (MSB/LSB may be flipped) | |
| *pOut++ |= (v & 0x10) ? onebit : zerobit; | |
| *pOut++ |= (v & 0x08) ? onebit : zerobit; | |
| *pOut++ |= (v & 0x04) ? onebit : zerobit; | |
| *pOut++ |= (v & 0x02) ? onebit : zerobit; | |
| *pOut++ |= (v & 0x01) ? onebit : zerobit; | |
| } | |
| } | |
| _framePos += srcThisChunk; | |
| // digitalWrite(21, LOW); | |
| } | |
| // DMA interrupt handler | |
| void IRAM_ATTR gdma_isr_handler(void* arg) | |
| { | |
| //digitalWrite(10, HIGH); | |
| gdma_dev_t *dma = &GDMA; | |
| // Check which descriptor completed (EOF interrupt) | |
| if (dma->intr[GDMA_CHANNEL].st.out_eof) { // EOF interrupt | |
| digitalWrite(10, HIGH); | |
| if (currentBuffer == 0) { | |
| digitalWrite(21, HIGH); | |
| // Buffer 0 just finished | |
| //buffer0NeedsRefill = true; | |
| EncodeChunk(0); | |
| currentBuffer = 1; | |
| } else { | |
| digitalWrite(21, LOW); | |
| // Buffer 1 just finished | |
| //buffer1NeedsRefill = true; | |
| EncodeChunk(1); | |
| currentBuffer = 0; | |
| } | |
| } | |
| // Clear interrupts | |
| dma->intr[GDMA_CHANNEL].clr.val = dma->intr[GDMA_CHANNEL].st.val; | |
| digitalWrite(10, LOW); | |
| } | |
| void setupSPIPins() { | |
| pinMode(PIN_DAT0, OUTPUT); | |
| pinMode(PIN_DAT1, OUTPUT); | |
| pinMode(PIN_DAT2, OUTPUT); | |
| pinMode(PIN_DAT3, OUTPUT); | |
| pinMode(10, OUTPUT); // debug | |
| pinMode(21, OUTPUT); // debug | |
| // Route SPI signals | |
| pinMatrixOutAttach(11, FSPICLK_OUT_IDX, false, false); // spi clk is needed to clock the dma it seems, if this line is skipped, no real data is output, GPIO11 is dedicated to power internal flash VDD_SPI, however, it still works for "dummy routing" | |
| // using gpio11 may be a bad idea... I saw no side effects in doing so, the pin voltage seems unaffected by this routing, not sure | |
| pinMatrixOutAttach(PIN_DAT0, FSPID_OUT_IDX, false, false); | |
| pinMatrixOutAttach(PIN_DAT1, FSPIQ_OUT_IDX, false, false); | |
| pinMatrixOutAttach(PIN_DAT2, FSPIWP_OUT_IDX, false, false); | |
| pinMatrixOutAttach(PIN_DAT3, FSPIHD_OUT_IDX, false, false); | |
| } | |
| void setup() { | |
| Serial.begin(115200); | |
| // Allocate DMA-capable memory | |
| buffer0 = (uint8_t*)heap_caps_aligned_alloc(4, BUF_SIZE, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); | |
| buffer1 = (uint8_t*)heap_caps_aligned_alloc(4, BUF_SIZE, MALLOC_CAP_DMA | MALLOC_CAP_INTERNAL); | |
| for (int i = 0; i < 4; i++) { | |
| _laneData[i] = (uint8_t*)heap_caps_malloc(_numbytes, MALLOC_CAP_INTERNAL); | |
| for(int j = 0; j < _numbytes; j++) { | |
| _laneData[i][j] = j; | |
| } | |
| } | |
| // Initialize with test patterns | |
| for (int j = 0; j < BUF_SIZE; j+=4) { | |
| buffer0[j] = 0xF7; | |
| buffer0[j+1] = 0x31; | |
| buffer0[j+2] = 0x00; | |
| buffer0[j+3] = 0x00; | |
| } | |
| for (int j = 0; j < BUF_SIZE; j+=4) { | |
| buffer1[j] = 0xF0; | |
| buffer1[j+1] = 0x00; | |
| buffer1[j+2] = 0xF0; | |
| buffer1[j+3] = 0x00; | |
| } | |
| // Setup DMA Descriptors for Circular Transfer | |
| dma_desc[0].size = BUF_SIZE; | |
| dma_desc[0].length = BUF_SIZE; | |
| dma_desc[0].owner = 1; | |
| dma_desc[0].sosf = 0; | |
| dma_desc[0].eof = 1; // Enable EOF interrupt | |
| dma_desc[0].buf = buffer0; | |
| dma_desc[0].qe.stqe_next = &dma_desc[1]; | |
| dma_desc[1].size = BUF_SIZE; | |
| dma_desc[1].length = BUF_SIZE; | |
| dma_desc[1].owner = 1; | |
| dma_desc[1].sosf = 0; | |
| dma_desc[1].eof = 1; // Enable EOF interrupt | |
| dma_desc[1].buf = buffer1; | |
| dma_desc[1].qe.stqe_next = &dma_desc[0]; // point back to the first buffer | |
| // Enable peripheral clocks | |
| SYSTEM.perip_clk_en0.reg_spi2_clk_en = 1; // note: this is a hack as this needs gating, its a shared register!!! cant be bothered right now for this test | |
| SYSTEM.perip_rst_en0.reg_spi2_rst = 1; | |
| SYSTEM.perip_rst_en0.reg_spi2_rst = 0; | |
| SYSTEM.perip_clk_en1.reg_dma_clk_en = 1; | |
| SYSTEM.perip_rst_en1.reg_dma_rst = 1; | |
| SYSTEM.perip_rst_en1.reg_dma_rst = 0; | |
| // Configure SPI2 | |
| spi_ll_master_init(hw); | |
| spi_ll_master_set_mode(hw, 0); | |
| spi_ll_set_tx_lsbfirst(hw, true); // send out LSB first instead of MSB first | |
| spi_line_mode_t linemode = {}; | |
| linemode.data_lines = 4; | |
| spi_ll_master_set_line_mode(hw, linemode); | |
| spi_ll_master_set_clock(&GPSPI2, 80000000, 2600000, 128); //2.7MHz -> 360ns per bit, 0-> 360ns, 1-> 720ns, total: 1440ns (slow, safe mode), min is 2.5MHz for 400ns pulse. at 2.6MHz the measured pulse width is 390ns and 780s so pretty much a sweet spot | |
| setupSPIPins(); | |
| spi_ll_set_mosi_bitlen(hw, TOTAL_BITS); | |
| spi_ll_enable_mosi(hw, true); | |
| spi_ll_dma_tx_enable(hw, true); | |
| spi_ll_dma_tx_fifo_reset(hw); | |
| spi_ll_outfifo_empty_clr(hw); | |
| spi_ll_apply_config(hw); | |
| // Configure GDMA | |
| gdma_dev_t *dma = &GDMA; | |
| gdma_ll_tx_reset_channel(dma, GDMA_CHANNEL); | |
| gdma_ll_tx_connect_to_periph(dma, GDMA_CHANNEL, GDMA_TRIG_PERIPH_SPI, 0); | |
| gdma_ll_tx_set_desc_addr(dma, GDMA_CHANNEL, (uint32_t)&dma_desc[0]); | |
| gdma_ll_tx_start(dma, GDMA_CHANNEL); | |
| // Enable EOF interrupt | |
| dma->intr[GDMA_CHANNEL].ena.out_eof = 1; | |
| dma->intr[GDMA_CHANNEL].clr.out_eof = 1; // clear eof interrupt flag | |
| // Attach interrupt handler note: when I cleared all interrupts here, the DMA would not start... | |
| esp_intr_alloc(ETS_DMA_CH0_INTR_SOURCE, ESP_INTR_FLAG_IRAM, gdma_isr_handler, NULL, NULL); | |
| issending = true; | |
| EncodeChunk(0); | |
| EncodeChunk(1); | |
| Serial.println("SPI DMA with interrupts start"); | |
| // Start SPI Transfer | |
| spi_ll_user_start(hw); | |
| } | |
| void loop() { | |
| if(issending) { | |
| delay(1); | |
| } | |
| else { | |
| // digitalWrite(21, LOW); | |
| delay(5); | |
| Serial.println("*"); | |
| while(!spi_ll_usr_is_done(hw)); // wait for finish transfer (TODO: this does not seem to hold up...) | |
| // reset SPI so it continues triggering the DMA | |
| spi_ll_dma_tx_fifo_reset(hw); | |
| spi_ll_outfifo_empty_clr(hw); | |
| spi_ll_apply_config(hw); | |
| // reset DMA so it starts a fresh frame | |
| gdma_dev_t *dma = &GDMA; | |
| gdma_ll_tx_reset_channel(dma, GDMA_CHANNEL); | |
| gdma_ll_tx_connect_to_periph(dma, GDMA_CHANNEL, GDMA_TRIG_PERIPH_SPI, 0); | |
| gdma_ll_tx_set_desc_addr(dma, GDMA_CHANNEL, (uint32_t)&dma_desc[0]); | |
| gdma_ll_tx_start(dma, GDMA_CHANNEL); | |
| issending = true; | |
| currentBuffer = 0; | |
| EncodeChunk(0); | |
| EncodeChunk(1); | |
| //gdma_ll_tx_restart(dma, GDMA_CHANNEL); // -> does not reset the dma buffer pointers | |
| spi_ll_user_start(hw); | |
| //buffer0NeedsRefill = false; | |
| //buffer1NeedsRefill = false; | |
| } | |
| // delay(200); | |
| //Serial.print("."); | |
| //spi_ll_user_start(hw); | |
| //while(!spi_ll_usr_is_done(hw)); // wait for finish transfer | |
| //delay(1); | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment