The attached Lua code cause's LuaJIT's custom allocator to badly fragment memory. LuaJIT reports 2.5Mbytes of memory allocated, VmRSS reports 926Mbytes on x86_64 linux system. Using the system malloc (LUAJIT_USE_SYSMALLOC) with 32bit LuaJIT doesn't have any fragmentation issues. The issue shows up when large chunk of memory (about 131 Kbytes) are shrunk down to around 270 bytes. It seems the allocator can't use the freed space in those chunks until they are completely freed. The script creates a larger number of tables and then grows each table to a little over 8k entries and shrinks it down to 20 entries. After that it tries to re-use the memory that should be free, by creating the same number of tables but only filling them with 128 entries, but the allocator gets new chunks of memory from the system, instead of using the memory that should be free. I first noticed this when playing around with Roberto's test script [1] that grows Lua stacks, but doesn't trigger a GC to shrink them. Manually calling the GC in the loop will allow the stacks to shrink (in both LuaJIT & standard Lua). But in LuaJIT the memory is not re-usable until the whole stacks are freed (i.e. the coroutines). At first I thought is was an issue with shrinking that stacks, so I created the attached script for testing with tables. 1. http://article.gmane.org/gmane.comp.lang.lua.general/93223 -- Robert G. Jakabosky
local a = {} local b = {} local function grow_table(t, lim) -- grow table. for i = 1, lim do t[i] = i end end local MIN_SIZE= 20 local function shrink_table(t, lim) -- empty table. for i = MIN_SIZE,lim do t[i] = nil end -- force shrinking of table. t[-1] = -1 end local function report_rss() -- read memory stats on Linux local stat, err_or_rss = pcall(function() io.write("process memory:\n") local status = io.open("/proc/self/status", "r") local rss = 0 for line in status:lines() do if line:sub(1,5) == "VmRSS" then io.write(line,'\n') rss = tonumber(line:match(":%s*(%d+)%s*kB")) end end status:close() return rss end) if not stat then print("Failed to get RSS:", err_or_rss) end return err_or_rss end local lim = 10000 if jit then -- check for 64bit system if jit.arch == 'x64' then lim = 7000 end end if arg[1] then lim = tonumber(arg[1]) end for i = 1, lim do a[i] = {} end for i = 1, lim do b[i] = i end print(" **** **** RUNNING") local function mem() return math.floor(collectgarbage"count") end local stat, err = pcall(function(a, lim) local len = 8 * 1024 + 1 -- some length between 8K and 16K for i = 1, lim do grow_table(a[i], len) shrink_table(a[i], len) if (i % 100) == 0 then io.write(i,' , mem Kb=', mem(), "\n") end end end, a, lim) if not stat then print("main loop error:", err) end local lmem1 = mem() io.write('Lua mem Kb=', mem(), "\n") local rss1 = report_rss() -- try to fill memory holes. local len = 128 for i = 1, lim do b[i] = {} grow_table(b[i], len) if (i % 100) == 0 then io.write(i,' , mem Kb=', mem(), "\n") end end io.write('Lua mem Kb=', mem(), "\n") local lmem2 = mem() local rss2 = report_rss() print("Process RSS(Kb): rss1=", rss1, ", rss2=", rss2, ", diff=", (rss2 - rss1)) print("Lua memory(Kb): mem1=", lmem1, ", mem2=", lmem2, ", diff=", (lmem2 - lmem1)) if not stat then print("read memory stats error:", err) end print("-----------------------------------------------------------------------------------") print("Free all tables.") a=nil b=nil collectgarbage"collect" report_rss() -- pause before exit. io.read("*l")