possible bug, segfault on OS X, Linux x86-64 (maybe related to conversion between string to (u)int32?)

  • From: Anders Bergh <anders1@xxxxxxxxx>
  • To: luajit@xxxxxxxxxxxxx
  • Date: Tue, 15 Oct 2013 04:59:31 +0200

Hi,

I wrote a script to parse some data out of time zone files, which randomly crashes. Ideally I would minimize it further, but every attempt so far just made it work without any crashes, and at least the script isn't too big.

It seems the crash is related to how I convert from a string to (u)int32_t, but small changes to the code will either make it work or break.

i32(s) and u32(s) are two functions for reversing endianness using string.reverse, they're only there to assert() that bit.bswap returns the same values.

Host info:

Mac OS X 10.9.0 (GM, x86-64), LuaJIT 2.0.2 (git: 5d25645a210f32dddecde9c50afb14f9ee63e180, latest as of 2013-10-15)

Linux (Debian, 3.9.3 kernel, x86-64, LuaJIT 2.0.2 release)

How to reproduce:

---

$ luajit ./zone.lua /usr/share/zoneinfo/Asia/Baku
{
  {ts=-1441163964, dst=false, name='BAKT', ut_offset=10800},
  ...
  {ts=2140041600, dst=false, name='AZT', ut_offset=14400}
}
Segmentation fault

---

I've also uploaded the "Asia/Baku" zone file from OS X, in case /usr/share/zoneinfo/Asia/Baku doesn't trigger the crash on your system:

http://fgsfd.se/~anders/Baku

--
Anders Bergh

--- 8< --- cut here --- 8< ---


local ffi = require 'ffi'
local bit = require 'bit'

ffi.cdef[[
struct tzhead {
        char tzh_magic[4];
        char tzh_version[1];
        char tzh_reserved[15];
        uint32_t tzh_ttisgmtcnt;
        uint32_t tzh_ttisstdcnt;
        uint32_t tzh_leapcnt;
        uint32_t tzh_timecnt;
        uint32_t tzh_typecnt;
        uint32_t tzh_charcnt;
};

struct tztype {
        int32_t ut_offset;
        uint8_t isdst;
        uint8_t abbridx;
};

struct tzleap {
        int32_t ttime;
        int32_t corr;
};
]]

local fn = arg[1]
local f = assert(io.open(fn, 'r'))

local function reverse(s)
        local t = {}
        for i=#s,1,-1 do t[#t+1] = s:sub(i,i) end
        return table.concat(t)
end

local function i32(s)
        local tmp = ffi.new('union { int32_t i; char x[4]; }')
        tmp.x = s
        tmp.i = bit.bswap(tmp.i)
        return tmp.i
end

local function u32(s)
        local tmp = ffi.new('union { uint32_t i; char x[4]; }')
        tmp.x = s
        tmp.i = bit.bswap(tmp.i)
        return tmp.i
end

local touint32, toint32
do
        local tmp = ffi.new('union { int32_t i; uint32_t u; char x[4]; }')
        local noop = function(n) return n end

        function toint32(s)
                tmp.x = s:reverse()
                local n = noop(tmp.i)
                assert(n==i32(s))
                return n
        end

        function touint32(s)
                tmp.x = s:reverse()
                local n = noop(tmp.u)
                assert(n==u32(s))
                return n
        end
end

local function readi32()
        return toint32(f:read(4))
end

local function readu32()
        return touint32(f:read(4 ))
end

local head = ffi.new('struct tzhead')
head.tzh_magic = f:read(4)
head.tzh_version = f:read(1)
head.tzh_reserved = f:read(15)
head.tzh_ttisgmtcnt = readu32()
head.tzh_ttisstdcnt = readu32()
head.tzh_leapcnt = readu32()
head.tzh_timecnt = readu32()
head.tzh_typecnt = readu32()
head.tzh_charcnt = readu32()

local times = ffi.new('int32_t[?]', head.tzh_timecnt)
for i=0, head.tzh_timecnt-1 do
        times[i] = readi32()
end

local timetypes = ffi.new('uint8_t[?]', head.tzh_timecnt)
for i=0, head.tzh_timecnt-1 do
        timetypes[i] = f:read(1):byte()
end

local types = ffi.new('struct tztype[?]', head.tzh_typecnt)
for i=0, head.tzh_typecnt-1 do
        types[i].ut_offset = readi32()
        types[i].isdst = f:read(1):byte()
        types[i].abbridx = f:read(1):byte()
end

local abbrs = ffi.new('char[?]', head.tzh_charcnt, f:read(head.tzh_charcnt))

local leaps = ffi.new('struct tzleap[?]', head.tzh_leapcnt)
for i=0, head.tzh_leapcnt-1 do
        leaps[i].ttime = readi32()
        leaps[i].corr = readi32()
end

local ttisstd = f:read(head.tzh_ttisstdcnt)
local ttisgmt = f:read(head.tzh_ttisgmtcnt)

print('{')
for i=0, head.tzh_timecnt-1 do
        local t = types[timetypes[i]]
io.write((' {ts=%d, dst=%s, name=\'%s\', ut_offset=%d}'):format(times[i], tostring(t.isdst==1), ffi.string(abbrs + t.abbridx), t.ut_offset))
        print(i < head.tzh_timecnt - 1 and ',' or '')
end
print('}')

f:close()


--- 8< --- cut here --- 8< ---


Other related posts: