[raspi-internals] Re: QPU Tutorials/Samples

  • From: Herman Hermitage <hermanhermitage@xxxxxxxxxxx>
  • To: "raspi-internals@xxxxxxxxxxxxx" <raspi-internals@xxxxxxxxxxxxx>
  • Date: Sat, 15 Feb 2014 14:51:28 +1200

I've pushed a basic assembler (written in js) to github.  See:


I would caution its probably going to be a couple of days before its robust 
enough even for the most basic usage!


Simple Assembler:

A rudimentary assembler (very alpha at this stage, will improve with use). It 
needs Node.js to run (will target web page later).

NOTE: Rotator and Pack/Unpack are not yet supported.

  node[js] qpuasm.js [--showbits] [--dumpglobals] [--dumpsymbols] [--verbose] 

Source Syntax:
  {[label:] [instruction/directive] [# comment] LF}


[addop] [; mulop] [; op]

Where addop, mulop and op are:
  op [dst [, src1 [, src2]]

  nop, fadd, fsub, fmin, fmax, fminabs, fmaxabs, ftoi, itof, add, sub, shr, asr
  ror, shl, min", max, and, or, xor, not, clz, v8adds, v8subs, mov

  nop, fmul, mul24, v8muld, v8min, v8max, v8adds, v8subs, mov

  bkpt, nop, thrsw, thrend, sbwait, sbdone, lthrsw, loadcv, loadc, ldcend, 
ldtmu0, ldtmu1, loadam, 
  ldi, bra, brr

dst, src1, src2:
  a register reference: r0...r5 or ra0...ra63, or rb0...rb63, or special reg 
(vpm, unif, ...).
  a small constant


  .set    symbol, jsexpr
  .global symbol


nodejs qpuasm.js qpu-02.s


.set vw_layout, function vw_layout(row_step, element_stride, offset) { return 
(offset | 0xa00 | row_step << 12 | element_stride << 20); } vw_layout
.set vw_setup0, function vw_setup0(x, y) { return (2<<30|y<<23|x<<16); } 
.set vw_setup1, function vw_setup1(x, y) { return (3<<30|x<<16|y); } vw_setup1

.global entry
.global exit

        # Determine if this QPU will signal on completion (flag is from 
        mov rb3, unif

        # Configure access to vpm
        ldi vw_setup, vw_layout(1, 1)

        # Write 5x16 words into vpm
        mov vpm, 1
        mov vpm, 2
        mov vpm, 4
        mov vpm, 8
        mov vpm, elem_num

        # Configure vpm write to memory
        ldi vw_setup, vw_setup0(5, 16)
        ldi vw_setup, vw_setup1(0, 0)

        # Trigger transfer to destination in memory (address is from uniforms)
        nop; mov vw_addr, unif

        # Wait for vpm transfer to finish
        mov.never -, vw_wait

        # Signal done
        mov irq, rb3
        nop; nop; thrend


/* Exported Symbols */
#define qpu_symbol_entry 0x00000000
#define qpu_symbol_exit 0x00000058

/* Assembled Program */
/* entry: */
/* 0x00000000: */ 0x15827d80, 0x100210e7, /* mov rb3, unif */
/* 0x00000008: */ 0x00101a00, 0xe0021c67, /* ldi vw_setup, vw_layout(1, 1) */
/* 0x00000010: */ 0x159c1fc0, 0xd0020c27, /* mov vpm, 1 */
/* 0x00000018: */ 0x159c2fc0, 0xd0020c27, /* mov vpm, 2 */
/* 0x00000020: */ 0x159c4fc0, 0xd0020c27, /* mov vpm, 4 */
/* 0x00000028: */ 0x159c8fc0, 0xd0020c27, /* mov vpm, 8 */
/* 0x00000030: */ 0x159a7d80, 0x10020c27, /* mov vpm, elem_num */
/* 0x00000038: */ 0x88050000, 0xe0021c67, /* ldi vw_setup, vw_setup0(5, 16) */
/* 0x00000040: */ 0xc0000000, 0xe0021c67, /* ldi vw_setup, vw_setup1(0, 0) */
/* 0x00000048: */ 0x80827036, 0x100049f2, /* nop; mov vw_addr, unif */
/* 0x00000050: */ 0x159f2fc0, 0x100009e7, /* mov.never -, vw_wait */
/* exit: */
/* 0x00000058: */ 0x159c3fc0, 0x100209a7, /* mov irq, rb3 */
/* 0x00000060: */ 0x009e7000, 0x300009e7, /* nop; nop; thrend */
/* 0x00000068: */ 0x009e7000, 0x100009e7, /* nop */
/* 0x00000070: */ 0x009e7000, 0x100009e7, /* nop */                             

Other related posts: