add utf-8 to codepage mapping, change some asm syntax

This commit is contained in:
Redo
2022-11-10 15:24:50 -06:00
parent 2fe0e50942
commit c67a88e7d0
4 changed files with 182 additions and 43 deletions

View File

@@ -1,13 +1,25 @@
local arch8608 = require("rom-8608-defs")
local aliases = {
["jpz imm8"] = {"jeq imm8"},
["jnz imm8"] = {"jne imm8"},
["jmp q" ] = {"ret" },
}
local function loadutf8table(fn)
local tt = {}
for l in io.lines(fn) do if l~="" then
local c, d = l:match("^([^ ]+) (.+)$")
local t = {}; for v in d:gmatch("[^ ]+") do table.insert(t, tonumber(v, 16)) end;
tt[c] = t
end end
return tt
end
local utf8table = loadutf8table("./utf8table.txt")
local function trim(s) return s:gsub("^ +", ""):gsub(" +$", "").."" end
local function getutf8len(c)
local d = c:byte()
if bit.band(d, 0xE0)==0xC0 then return 2
elseif bit.band(d, 0xF0)==0xE0 then return 3
elseif bit.band(d, 0xF8)==0xF0 then return 4
else error("invalid utf8 first byte: "..string.format("%02X", d)) end
end
local function validWordsFromInstrs(instrs)
local words = {}
for mnem, _ in pairs(instrs) do
@@ -303,6 +315,9 @@ local function prefixCode(code, fn) -- fix strings, add line numbers
local bracehasmid = {}
local lastnl = false
local utf8str = ""
local utf8len = 0
out(".ln 1"); out("\n");
for i = 1, #code do
local c = code:sub(i, i)
@@ -347,11 +362,24 @@ local function prefixCode(code, fn) -- fix strings, add line numbers
elseif state=="commentml" then
if c=="/" and cp=="*" then state = "code" end
elseif state=="string" then
if c=="\\" then state = "stringesc"
elseif c=="\"" then state = "code"
if c=="\\" then state = "stringesc"
elseif c=="\"" then state = "code"
elseif c:byte()>=128 then
utf8str = c
utf8len = getutf8len(c)
state = "stringutf8"
else outn(c:byte()) end
elseif state=="stringesc" then
outn(string.byte(stringEscapes[c] or error("invalid escape "..c))); state = "string";
elseif state=="stringutf8" then
utf8str = utf8str..c
if #utf8str == utf8len then
local valt = utf8table[utf8str]
if not valt then local datastr = ""; for i = 1, #utf8str do datastr = datastr .. string.format("%02X ", utf8str:sub(i, i):byte()) end;
error("Unrecognized UTF-8 character: "..datastr); end
for i, v in ipairs(valt) do outn(v) end
state = "string"
end
end
end
assert(#bracestack==0, "unclosed brace")
@@ -397,7 +425,7 @@ local function instrsFromArch(arch)
mnem = mnem:gsub("([%*%+%-])", " %1 ")
mnem = trim(mnem):gsub(" +", " ")
addMnem(mnem, instr.opcode)
local alias = aliases[trim(mnem)]
local alias = arch.aliases[trim(mnem)]
if alias then for _, v in ipairs(alias) do addMnem(v, instr.opcode) end end
end
end
@@ -566,13 +594,14 @@ local function strtovec(str) local v = {}; for word in str:gmatch("[^ \t\r\n]+")
function AssembleFile(fn, romsizes, offsets, lens) local offset = tonumber(offsets); local len = tonumber(lens); local romsize = strtovec(romsizes);
local arch = arch8608
local mem = assembleFile(fn, arch)
print(""..fn:match("[^/\\]+$").."\n")
printMemory(mem)
assert(#romsize==3, "incorrect rom size")
buildMemory(mem, romsize, offset, len)
disassembleMemory(mem, arch)
end
ts.eval [[
function AssembleFile(%fn, %romsize, %offset, %len) { luacall("AssembleFile", %fn, %romsize, %offset, %len); }
function AssembleFile(%fn, %romsize, %offset, %len) { luacall("AssembleFile", "Add-ons/_misc/rom/8608programs/" @ %fn, %romsize, %offset, %len); }
]]
if arg then AssembleFile(arg[1] or "../8608programs/test.asm", "16 16 8", "0", "256") end