Array performance with 2.0.0-beta10 versus git HEAD

  • From: Peter Colberg <peter@xxxxxxxxxxx>
  • To: luajit@xxxxxxxxxxxxx
  • Date: Tue, 28 Aug 2012 12:19:49 -0400

Hi,

I noticed a reproducible difference in run time experimenting with
a wrapper around C arrays (for bound checks and length operator).

When running the attached code with a relative script path,

    time /opt/luajit-2.0.0-beta10/bin/luajit-2.0.0-beta10 array_test.lua
    
    real    0m25.579s
    user    0m25.490s
    sys     0m0.004s
    
    time 
/opt/luajit-2.0.0-beta10-146-g751cd9d/bin/luajit-2.0.0-beta10-146-g751cd9d 
array_test.lua
    
    real    0m1.474s
    user    0m1.456s
    sys     0m0.008s

When running the code with an absolute script path,

    time /opt/luajit-2.0.0-beta10/bin/luajit-2.0.0-beta10 /tmp/array_test.lua
    
    real    0m1.532s
    user    0m1.512s
    sys     0m0.016s
    
    time 
/opt/luajit-2.0.0-beta10-146-g751cd9d/bin/luajit-2.0.0-beta10-146-g751cd9d 
/tmp/array_test.lua
    
    real    0m1.481s
    user    0m1.460s
    sys     0m0.012s

(In both cases, all files are in /tmp)

Why is there such a significant difference with 2.0.0-beta10, and not
with git HEAD? Could this actually be caused by the absolute versus
relative script path?

I built LuaJIT on a current Debian wheezy (x86_64) using

make amalg "CFLAGS=-fPIC -DLUAJIT_ENABLE_LUA52COMPAT -DLUAJIT_CPU_SSE2"

The runs were performed on an Intel Core i7-2640M CPU @ 2.80GHz.

Thanks,
Peter
local array = require("array")
local vec3 = require("vec3")

local n = 1000000
local a = array(n, "vec3")

for r = 1, 500 do
    for i = 1, #a do
        for j = 1, #a[i] do
            a[i][j] = 2 * a[i][j]
        end
    end
end
local ffi = require("ffi")

ffi.cdef[[
void *malloc(size_t size);
void free(void *ptr);
]]

local function malloc(size)
    local p = ffi.C.malloc(size)
    if p == nil then
        return error("out of memory")
    end
    ffi.gc(p, ffi.C.free)
    return p
end

local function array_new(self, n, dtype)
    local dtype = dtype or "double"
    local size = n * ffi.sizeof(dtype)
    local data = malloc(size)
    local array = ffi.cast(dtype .. " *", data) - 1

    local mt = {data = data}

    function mt:__len()
        return n
    end

    function mt:__index(i)
        if i < 1 or i > n then
            return error("index out of bounds")
        end
        return array[i]
    end

    function mt:__newindex(i, v)
        if i < 1 or i > n then
            return error("index out of bounds")
        end
        array[i] = v
    end

    -- keep table empty, otherwise __index performance degrades
    return setmetatable({}, mt)
end

return setmetatable({}, {__call = array_new})
local ffi = require("ffi")

ffi.cdef[[
typedef struct vec3 { double data[3]; } vec3;
]]

local mt = {}

local n = 3

function mt:__len()
    return n
end

function mt:__index(i)
    if i < 1 or i > n then
        return error("index out of bounds")
    end
    return self.data[i - 1]
end

function mt:__newindex(i, v)
    if i < 1 or i > n then
        return error("index out of bounds")
    end
    self.data[i - 1] = v
end

return ffi.metatype("vec3", mt)

Other related posts: