-
-
Save FlorianHeigl/bcb8a667372e776cd7032ad8cda9fa89 to your computer and use it in GitHub Desktop.
| # add one entry per port per card | |
| dhcp-host=00:0f:mac1,192.168.236.100,lios1octeth0,12h | |
| dhcp-host=00:0f:mac2,192.168.236.101,lios1octeth1,12h | |
| # match the vendor pattern to a tag | |
| # everything later matches to this tag | |
| dhcp-mac=set:lio,00:0f:b7:*:*:* | |
| # doku zu options: https://blog.abysm.org/2020/06/human-readable-dhcp-options-for-dnsmasq/ | |
| # sntp support etc ermoeglichen | |
| dhcp-option = tag:lio, option:router, 192.168.236.1 | |
| dhcp-option = tag:lio, option:ntp-server, 192.168.236.1 | |
| # aber auch einfach eine uhrzeit hergeben (nicht supported im build | |
| # dhcp-option = tag:lio, option:time-offset | |
| # bootcommand: | |
| # " * Added back NetOurBootCommand environment variable as DHCP option 224." braucht u-boot aus sdk 5.1! | |
| dhcp-option-force=224,"tftp ; bootoctlinux $(loadaddr) numcores=16 ip=dhcp console=ttyS0,115200 console=ttyPCI0 rootwait=10 panic | |
| # bootfile: dieses bootet das embedded rootfs mit 4.9 kernel (this works, but success also depends on the console settings | |
| # having a pci stdout blocks boot | |
| # having no pci stdout means you can't interrupt autoboot | |
| # you'll figure it out, it just sucks because we don't know how to do it right | |
| dhcp-boot=tag:lio,erfs/vmlinux.64 | |
| # umstellen auf mkimage uboot script | |
| #dhcp-boot = tag:lio, ... | |
| #bootoctlinux ${loadaddr} coremask=f root=/dev/nfs rw console=ttyS0,115200 ip=bootp | |
| #bootoctlinux ${loadaddr} numcores=n root=/dev/ram console=ttyS0,115200 | |
| # file:///u_boot.pdf | |
| # idk what that pdf was, seems I thought it matters. |
| # docs: https://docs.u-boot.org/en/latest/usage/environment.html | |
| export OCTEON_REMOTE_PROTOCOL=PCI:2 | |
| LINES="setenv autoload n | |
| setenv bootdelay -1; setenv baudrate 115200; setenv env_size 2000 | |
| setenv fdtaddr 80000; setenv fileaddr 0x20000000; setenv filesize 2000 | |
| setenv flash_base_addr 1fb00000; setenv flash_size 100000; | |
| setenv liofsize 0x142840; setenv lionicaddr 0x400000; setenv loadaddr 0x20000000 | |
| setenv named_block_addr 0xfffeff80; setenv named_block_size 0x10000 | |
| setenv octeth0_speed 10 | |
| setenv octeth1_speed 25 | |
| setenv qlm4_mode sata" | |
| oct-remote-bootcmd "env default -a" | |
| echo "${LINES}" | \ | |
| while read _line ; do | |
| oct-remote-bootcmd "${_line}" | |
| sleep 3 | |
| done | |
| oct-remote-bootcmd "saveenv" | |
| #setenv bootcmd 'run uefi_bootcmd; sf probe; sf read ${fileaddr} ${lionicaddr} ${liofsize}; bootoct forceboot numcores=${numcores}' | |
| #setenv uefi_bootcmd 'namedalloc __stage2_uefi 0x10000; cp64.b 0x1fb20000 ${named_block_addr} ${named_block_size}' |
| stdout=serial | |
| stderr=serial | |
| stdin=serial,bootcmd | |
| bootdelay=0 | |
| autoboot=0 | |
| autostart=yes | |
| bootretry=100 | |
| octeth0_speed=10 | |
| octeth1_speed=10 |
| bootdelay=10 | |
| autoboot=0 | |
| autostart=yes | |
| bootretry=100 | |
| pci_console_active | |
| serial_console_active=1 | |
| stdin=pci,serial,bootoct | |
| stdout=serial | |
| stderr=serial | |
| octeth0_speed=10 | |
| octeth1_speed=10 | |
| tftpwindowsize=16 | |
| tftpblocksize=1500 | |
| bootcmd=dhcp ; tftp ; bootoctlinux coremask=ffff root=/dev/ram rootwait=5 console=ttyS0,115200 console=ttyPCI0 ip=dhcp reboot=pci rootwait=10 panic=10 numcores=16 mem=8G | |
| saveenv |
| bootdelay=10 | |
| autoboot=0 | |
| pci_console_active=1 | |
| serial_console_active=1 | |
| stdin=pci,serial,bootoct | |
| stdout=serial,pci | |
| stderr=serial,pci | |
| octeth0_speed=10 | |
| octeth1_speed=10 | |
| bootcmd=mmc init; dhcp ; tftp ; bootoctlinux numcores=14 root=/dev/ram rootwait=5 console=ttyS0,115200 ip=dhcp reboot=pci rootwait=10 panic=10 mem=7G | |
| saveenv | |
| [root@lion firmware]# cat env.tftp | |
| bootdelay=10 | |
| autoboot=0 | |
| autostart=yes | |
| bootretry=100 | |
| pci_console_active | |
| serial_console_active=1 | |
| stdin=pci,serial,bootoct | |
| stdout=serial | |
| stderr=serial | |
| octeth0_speed=10 | |
| octeth1_speed=10 | |
| tftpwindowsize=16 | |
| tftpblocksize=1500 | |
| bootcmd=dhcp ; tftp ; bootoctlinux coremask=ffff root=/dev/ram rootwait=5 console=ttyS0,115200 console=ttyPCI0 ip=dhcp reboot=pci rootwait=10 panic=10 numcores=16 mem=8G | |
| saveenv |
| #!/usr/bin/env bash | |
| load_fw(){ | |
| cd /root/firmware | |
| # lade und warte 20s mit dem boot | |
| OCTEON_REMOTE_PROTOCOL=PCI:${1} oct-remote-boot /tftpboot/u-boot-octeon_nic225e.bin --envfile=env.dhcp | |
| #if [ $? != 0 ]; then | |
| # OCTEON_REMOTE_PROTOCOL=PCI:${1} timeout 20 oct-remote-reset | |
| # OCTEON_REMOTE_PROTOCOL=PCI:${1} oct-remote-boot /tftpboot/u-boot-octeon_nic225e.bin --envfile=env.dhcp | |
| #fi | |
| # reset falls sfp xaui problem mit erkennung dac da waere, anderes envfile | |
| } | |
| run_boot(){ | |
| OCTEON_REMOTE_PROTOCOL=PCI:${1} oct-remote-bootcmd "dhcp" | |
| sleep 10 | |
| OCTEON_REMOTE_PROTOCOL=PCI:${1} oct-remote-bootcmd "run NetOurBootCommand" | |
| } | |
| # reihenfolge im treiber entspricht nicht der in den slots | |
| # und kann sich aendern | |
| for _cnt in 0 2 4 6 8 10 12 14 16 18 ; do | |
| #for _cnt in 0; do | |
| sleep 2 | |
| clear | |
| echo "lade auf adapter #${_cnt}" | |
| load_fw ${_cnt} && | |
| echo "geladen auf adapter #${_cnt}" && | |
| sleep 5 | |
| #echo "boote adapter #${_cnt}" && | |
| #run_boot ${_cnt} & | |
| done | |
| # alt | |
| # OCTEON_REMOTE_PROTOCOL=PCI:${1} oct-remote-bootcmd "dhcp ; tftp ; bootoctlinux coremask=ffff root=/dev/ram rootwait=5 console=ttyS0,115200 console=ttyPCI0 ip=dhcp reboot=pci rootwait=10 panic=10 numcores=16 mem=8G" |
my env looks like this
autoboot=0
autoload=y
autostart=yes
baudrate=115200
boardname=nic225e
bootcmd=bootoctlinux $(loadaddr) numcores=16 ip=dhcp console=ttyS0,115200 console=ttyPCI0 rootwait=10 panic=10 noinitrd
bootdelay=0
bootfile=erfs/vmlinux.64
bootretry=100
dnsip=192.168.236.1
env_size=2000
eth1addr=00:XXXXXXXX
ethact=octeth0
ethaddr=00XXXXXXXX
fdtaddr=80000
fileaddr=20000000
filesize=43bb3a0
flash_base_addr=1fb00000
flash_size=100000
gatewayip=192.168.236.1
hostname=lios2octeth0
ipaddr=192.168.236.110
loadaddr=0x20000000
netmask=255.255.255.0
numcores=16
numcoreshex=0x10
octeon_failsafe_mode=0
octeon_ram_mode=0
octeth0_speed=10
octeth1_speed=10
pci_console_active=1
serial#=4.XXXXXXXXXXXXXXXXXXXXX
serial_console_active=1
serverip=192.168.236.1
stderr=serial,pci
stdin=serial,pci,bootcmd
stdout=serial,pci
ver=U-Boot 2013.075ff195580 (Build time: Mar 31 2025 - 00:09:41)
Environment size: 948/8188 bytes
i've not been able to run os-side fw_printenv/fw_setenv yet (needs proper configuration file) - once THAT works many other things are unlocked.
just as an addendum, this is what the original 'hyperscaler' fw looks like with autoboot
NIC225E board revision major:4, minor:0, serial #: 4.0G1749-GBC000988
OCTEON CN2360-AAP pass 1.3, Core clock: 1500 MHz, IO clock: 800 MHz, DDR clock: 800 MHz (1600 Mhz DDR)
Net: octeth0, octeth1
Hit any key to stop autoboot: 0
Allocated 0x10000 bytes at address: 0xfffeff80, name: __stage2_uefi
SF: Detected MT25QL01G with page size 256 Bytes, erase size 4 KiB, total 128 MiB
SF: 1321024 bytes @ 0x400000 Read: OK
Bootloader: Done loading app on coremask:
0xffffase_addr=1fb00000
Using device tree
loadaddr=0x20000000
###################################################
WARNING: Software configured for older revision than running on.
Compiled for ID=0x000d9702, Chip is 0x000d9703
###################################################
octeth0_speed=10
Cores: 1500 MHz (boot: 0, disp: 15, ctrl: 0, ls: 14), 64-bit app
Node 0 Interface 0 has 4 ports (XFI)
Node 0 Interface 1 has 4 ports (XFI)
Node 0 Interface 3 has 128 ports (NPI)
Node 0 Interface 4 has 4 ports (LOOP)
[ DRV ] Active CN73xx PCI Queues: 128
[ DRV ] PCIPKOMAP (port, queue):
0: (256, 37) (257, 38) (258, 39) (259, 40) 025 - 00:09:41)
4: (260, 41) (261, 42) (262, 43) (263, 44)
8: (264, 45) (265, 46) (266, 47) (267, 48)
12: (268, 49) (269, 50) (270, 51) (271, 52)
16: (272, 53) (273, 54) (274, 55) (275, 56)
20: (276, 57) (277, 58) (278, 59) (279, 60)
24: (280, 61) (281, 62) (282, 63) (283, 64)
28: (284, 65) (285, 66) (286, 67) (287, 68) Firm: DXV8C03Q Ser#: S1E4NYAG510917
32: (288, 69) (289, 70) (290, 71) (291, 72)
36: (292, 73) (293, 74) (294, 75) (295, 76) 5385008 x 512)
40: (296, 77) (297, 78) (298, 79) (299, 80) 6 oct-remote-console 0
44: (300, 81) (301, 82) (302, 83) (303, 84)
48: (304, 85) (305, 86) (306, 87) (307, 88)
52: (308, 89) (309, 90) (310, 91) (311, 92) 8 oct-remote-console 0
56: (312, 93) (313, 94) (314, 95) (315, 96)
60: (316, 97) (317, 98) (318, 99) (319, 100)
64: (320, 101) (321, 102) (322, 103) (323, 104)
68: (324, 105) (325, 106) (326, 107) (327, 108)
72: (328, 109) (329, 110) (330, 111) (331, 112)
76: (332, 113) (333, 114) (334, 115) (335, 116)
80: (336, 117) (337, 118) (338, 119) (339, 120)
84: (340, 121) (341, 122) (342, 123) (343, 124)
88: (344, 125) (345, 126) (346, 127) (347, 128)
92: (348, 129) (349, 130) (350, 131) (351, 132)
96: (352, 133) (353, 134) (354, 135) (355, 136)
100: (356, 137) (357, 138) (358, 139) (359, 140)
104: (360, 141) (361, 142) (362, 143) (363, 144)
108: (364, 145) (365, 146) (366, 147) (367, 148)
112: (368, 149) (369, 150) (370, 151) (371, 152)
116: (372, 153) (373, 154) (374, 155) (375, 156)
120: (376, 157) (377, 158) (378, 159) (379, 160)
124: (380, 161) (381, 162) (382, 163) (383, 164)
DMA Queues 0-7 initialized
DPI_DMA_CONTROL: 0x010f07010008c000
DPI_ENGX_BUF(0): 0x0000000400000004
DPI_ENGX_BUF(1): 0x0000000400000044
DPI_ENGX_BUF(2): 0x0000000400000084
DPI_ENGX_BUF(3): 0x00000004000000c4
DPI_ENGX_BUF(4): 0x0000000400000104
DPI_ENGX_BUF(5): 0x0000000800000148
Warning: Enabling PKI when PKI already enabled.
if you wana make progress on this, pay attention to and learn about that dhcp option 224 they use to get a uboot bootscript.
the second largest wound is rebooting. so booting, and rebooting both still suck. after that things will get better.
long-term suck is power management, I have some exact power draw with the cards at u-boot prompt and running kernel 4.9. maybe in 6.10+ it finally works
further suck is the port LEDs, not working on 4.9, can be a device tree or a driver issue. no clue
I'll just keep adding to this random gist instead of my issue tracker
zip offloading does not yet work (i left out 2 cores, what else do you need!)
/examples # ./zip abc
CVMX_SHARED: 0x102d0000-0x103b0000
Active coremask = 0x3fff
ERROR: cvmx_fpa3_reserve_aura: node=0 desired aura=5
ERROR: cvmx_fpa3_setup_aura_and_pool: Failed in AURA not available
appmain: ERROR: in ZIP buffer pool
this matters i.e. if you wanted to use zram (gzip compressed on multiple paging devices should be fast and low-lat)
crypto example works but ofc /proc/crypto doesn't indicate any offloading in the rest of the system.
"with an engineering mindset" this is a great state because it's just individual, unconnected small issues which are supposed to be easier to be solved.
let us celebrate, let us all make merry.
everyone!
let's shout!
scream out!
fyi: this is what I use and as you can see change around still per different test needs.
the process is still a bit fragile, since the ordering of the nics is still static and i have no good way of managing the boot
it would need integration with the tftp server logs or a heartbeat system to work better.
it especially can fail
the most important improvement would be to read VPD - specifically the serial number of a nic and to as such fixate the PCIe slots
to specific cards and then also manage the MAC addresses and port speeds. A possible workaround is to pass through only one NIC to the docker container with the oct-remote-tools. then you could blacklist/unblacklist NICs from the host OS or just start a container on a specific PIC address, load firmware, expose the serial and mac. PCIe lockups could still be a nightmare, unless it's possible to toggle on/off hotplug of ports on the PCIe switch. The switch (ASMedia Technology Inc. Device 2824) seems to support it, but I got no further idea how to do any of that.
Still the script makes the process already a lot easier, can safely load u-boot. with a tftp server the next step is also solved (I use dnsmasq on the host with the pcie switch board for now. oct-remote-tools of course could also run there without a docker container.
The other improvement is making much better use of dhcp/tftp: the card can fetch precompiled u-boot scripts which could "easily" do dynamic setup of interfaces based of script execution, or i.e. boot into a certain git-tag named nfs directory etc.\
those are the things where it should go.
the additional script named
env-reset-scriptis an attempt at fixing a messed up u-boot env.warnings:
also watch out around
tftpwindowsize: