Hello, I've been working on a VC4 backend, mostly to teach myself how to write LLVM backends. It's here: https://cowlark.com/llvm I haven't actually tried running any of the output yet, and it's got some scarily big code generation holes (e.g. no 64 bit support, and it has no knowledge of code size, which means it doesn't know how to generate near vs far branches), but the code quality looks like it'd be reasonable, even though there's no support for condition codes or pushes and pops. Question: does anyone know how to do 64-bit arithmetic on the VC4? It seems to be lacking adc and sbc instructions. For example, here's a simple CRC16 routine what I stole off the internet: ---snip--- #define POLY 0x8408 unsigned short crc16(unsigned char *data_p, unsigned short length) { unsigned char i; unsigned int data; unsigned int crc = 0xffff; if (length == 0) return (~crc); do { for (i=0, data=(unsigned int)0xff & *data_p++; i < 8; i++, data >>= 1) { if ((crc & 0x0001) ^ (data & 0x0001)) crc = (crc >> 1) ^ POLY; else crc >>= 1; } } while (--length); crc = ~crc; data = crc; crc = (crc << 8) | (data >> 8 & 0xff); return (crc); } ---snip--- Compiled with clang -Os and llc -O3, I get: ---snip--- .text .file "test.ll" .globl crc16 .align 16 .type crc16,@function crc16: # @crc16 # BB#0: sub sp, #8 ;long st r6, #4 (sp) ;12-bit displacement # 4-byte Folded Spill st r7, #0 (sp) ;12-bit displacement # 4-byte Folded Spill mov r3, #65535 ;long mov r2, #0 beq r1, #0, BB0_5 b BB0_1 BB0_1: # %.preheader # =>This Loop Header: Depth=1 # Child Loop BB0_2 Depth 2 mov r4, #0 ldb r5, (r0) mov r6, r4 ;fast BB0_2: # Parent Loop BB0_1 Depth=1 # => This Inner Loop Header: Depth=2 mov r2, r3 ;fast eor r2, r5 ;short and r7, r2, #1 ;medium shr r3, #1 ;short mov r2, r3 ;fast eor r2, #33800 ;long cmp r7, r4 moveq r2, r3 shr r5, #1 ;short add r6, #1 ;short mov r7, r6 ;fast and r7, #255 ;long mov r3, r2 ;fast bne r7, #8, BB0_2 b BB0_3 BB0_3: # in Loop: Header=BB0_1 Depth=1 add r0, #1 ;short add r1, #-1 ;long mov r4, r1 ;fast and r4, #65535 ;long mov r3, r2 ;fast bne r4, #0, BB0_1 b BB0_4 BB0_4: not r2 mov r0, r2 ;fast shl r0, #8 ;short shr r2, #8 ;short and r2, #255 ;long or r2, r0 ;short BB0_5: and r2, #65535 ;long mov r0, r2 ;fast ld r7, #0 (sp) ;12-bit displacement # 4-byte Folded Spill ld r6, #4 (sp) ;12-bit displacement # 4-byte Folded Spill add sp, #8 ;long b lr Ltmp0: .size crc16, Ltmp0-crc16 ---snip--- -- ┌─── dg@cowlark.com ───── http://www.cowlark.com ───── │ "There does not now, nor will there ever, exist a programming │ language in which it is the least bit hard to write bad programs." --- │ Flon's Axiom
Attachment:
signature.asc
Description: OpenPGP digital signature