"""
Function to parse tProc assembly language programs.
"""
import re
# Function to parse program.
[docs]def parse_prog(file="prog.asm",outfmt="bin"):
"""
Parses the .asm assembly language tProc program into a specified output format (binary or hex)
:param file: ASM program file name
:type file: str
:param outfmt: Output format ("bin" or "hex")
:type outfmt: str
:return: Program in the new output format
:rtype: bin or hex
"""
# Output structure.
outProg = {}
# Instructions.
instList = {}
# I-type.
instList['pushi'] = {'bin':'00010000'}
instList['popi'] = {'bin':'00010001'}
instList['mathi'] = {'bin':'00010010'}
instList['seti'] = {'bin':'00010011'}
instList['synci'] = {'bin':'00010100'}
instList['waiti'] = {'bin':'00010101'}
instList['bitwi'] = {'bin':'00010110'}
instList['memri'] = {'bin':'00010111'}
instList['memwi'] = {'bin':'00011000'}
instList['regwi'] = {'bin':'00011001'}
instList['setbi'] = {'bin':'00011010'}
# J-type.
instList['loopnz'] = {'bin':'00110000'}
instList['condj'] = {'bin':'00110001'}
instList['end'] = {'bin':'00111111'}
# R-type.
instList['math'] = {'bin':'01010000'}
instList['set'] = {'bin':'01010001'}
instList['sync'] = {'bin':'01010010'}
instList['read'] = {'bin':'01010011'}
instList['wait'] = {'bin':'01010100'}
instList['bitw'] = {'bin':'01010101'}
instList['memr'] = {'bin':'01010110'}
instList['memw'] = {'bin':'01010111'}
instList['setb'] = {'bin':'01011000'}
# Structures for symbols and program.
progList = {}
symbList = {}
##############################
### Read program from file ###
##############################
fd = open(file,"r")
addr = 0
for line in fd:
# Match comments.
m = re.search("^\s*//", line)
# If there is a match.
if m:
#print(line)
a = 1
else:
# Match instructions.
jump_re = "^((.+):)?"
inst_re_I = "pushi|popi|mathi|seti|synci|waiti|bitwi|memri|memwi|regwi|setbi|";
inst_re_J = "loopnz|condj|end|";
inst_re_R = "math|set|sync|read|wait|bitw|memr|memw|setb";
inst_re = "\s*(" + inst_re_I + inst_re_J + inst_re_R + ")\s+(.+);";
comp_re = jump_re + inst_re
m = re.search(comp_re, line, flags = re.MULTILINE)
# If there is a match.
if m:
# Tagged instruction for jump.
if m.group(2):
symb = m.group(2)
inst = m.group(3)
args = m.group(4)
# Add symbol to symbList.
symbList[symb] = addr;
# Add instruction to progList.
progList[addr] = {'inst':inst,'args':args}
# Increment address.
addr = addr + 1
# Normal instruction.
else:
inst = m.group(3)
args = m.group(4)
# Add instruction to progList.
progList[addr] = {'inst':inst,'args':args}
# Increment address.
addr = addr + 1
# Check special case of "end" instruction.
else:
m = re.search("\s*(end);",line)
# If there is a match.
if m:
# Add instruction to progList.
progList[addr] = {'inst':'end','args':''}
# Increment address.
addr = addr + 1
#########################
### Support functions ###
#########################
def unsigned2bin(strin,bits=8):
maxv = 2**bits - 1
# Check if hex string.
m = re.search("^0x", strin, flags = re.MULTILINE)
if m:
dec = int(strin, 16)
else:
dec = int(strin, 10)
# Check max.
if dec > maxv:
print("Error: number %d is bigger than %d" %(dec,maxv))
return None
# Convert to binary.
fmt = "{0:0" + str(bits) + "b}"
binv = fmt.format(dec)
return binv
def integer2bin(strin,bits=8):
minv = -2**(bits-1)
maxv = 2**(bits-1) - 1
# Check if hex string.
m = re.search("^0x", strin, flags = re.MULTILINE)
if m:
# Special case for hex number.
dec = int(strin, 16)
# Convert to binary.
fmt = "{0:0" + str(bits) + "b}"
binv = fmt.format(dec)
return binv
else:
dec = int(strin, 10)
# Check max.
if dec < minv:
print("Error: number %d is smaller than %d" %(dec,minv))
return None
# Check max.
if dec > maxv:
print("Error: number %d is bigger than %d" %(dec,maxv))
return None
# Check if number is negative.
if dec < 0:
dec = dec + 2**bits
# Convert to binary.
fmt = "{0:0" + str(bits) + "b}"
binv = fmt.format(dec)
return binv
def op2bin(op):
if op == "0":
return "0000"
elif op == ">":
return "0000"
elif op == ">=":
return "0001"
elif op == "<":
return "0010"
elif op == "<=":
return "0011"
elif op == "==":
return "0100"
elif op == "!=":
return "0101"
elif op == "+":
return "1000"
elif op == "-":
return "1001"
elif op == "*":
return "1010"
elif op == "&":
return "0000"
elif op == "|":
return "0001"
elif op == "^":
return "0010"
elif op == "~":
return "0011"
elif op == "<<":
return "0100"
elif op == ">>":
return "0101"
elif op == "upper":
return "1010"
elif op == "lower":
return "0101"
else:
print("Error: operation \"%s\" not recognized" % op)
return "1111"
######################################
### First pass: parse instructions ###
######################################
for e in progList:
inst = progList[e]['inst']
args = progList[e]['args']
# I-type: three registers and an immediate value.
# I-type:<inst>:page:channel:oper:ra:rb:rc:imm
# pushi p, $ra, $rb, imm
if inst == 'pushi':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)\s*,\s*(\-?\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
ra = m.group(2)
rb = m.group(3)
imm = m.group(4)
# Add entry into structure.
progList[e]['inst_parse'] = "I-type:pushi:" + page + ":0:0:" + rb + ":" + ra + ":0:" + imm
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# popi p, $r
elif inst == 'popi':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
r = m.group(2)
# Add entry into structure.
progList[e]['inst_parse'] = "I-type:popi:" + page + ":0:0:" + r + ":0:0:0"
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# mathi p, $ra, $rb oper imm
if inst == 'mathi':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)\s*([\+\-\*])\s*(0?x?\-?[0-9a-fA-F]+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
ra = m.group(2)
rb = m.group(3)
oper = m.group(4)
imm = m.group(5)
# Add entry into structure.
progList[e]['inst_parse'] = "I-type:mathi:" + page + ":0:" + oper + ":" + ra + ":" + rb + ":0:" + imm
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# seti ch, p, $r, t
if inst == 'seti':
comp_re = "\s*(\d+)\s*,\s*(\d+)\s*,\s*\$(\d+)\s*,\s*(\-?\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
ch = m.group(1)
page = m.group(2)
ra = m.group(3)
t = m.group(4)
# Add entry into structure.
progList[e]['inst_parse'] = "I-type:seti:" + page + ":" + ch + ":0:0:" + ra + ":0:" + t
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# synci t
if inst == 'synci':
comp_re = "\s*(\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
t = m.group(1)
# Add entry into structure.
progList[e]['inst_parse'] = "I-type:synci:0:0:0:0:0:0:" + t
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# waiti ch, t
if inst == 'waiti':
comp_re = "\s*(\d+)\s*,\s*(\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
ch = m.group(1)
t = m.group(2)
# Add entry into structure.
progList[e]['inst_parse'] = "I-type:waiti:0:" + ch + ":0:0:0:0:" + t
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# bitwi p, $ra, $rb oper imm
if inst == 'bitwi':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)\s*([&|<>^]+)\s*(0?x?\-?[0-9a-fA-F]+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
ra = m.group(2)
rb = m.group(3)
oper = m.group(4)
imm = m.group(5)
# Add entry into structure.
progList[e]['inst_parse'] = "I-type:bitwi:" + page + ":0:" + oper + ":" + ra + ":" + rb + ":0:" + imm
# bitwi p, $ra, ~imm
else:
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*,\s*~\s*(0?x?\-?[0-9a-fA-F]+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
ra = m.group(2)
oper = "~"
imm = m.group(3)
# Add entry into structure.
progList[e]['inst_parse'] = "I-type:bitwi:" + page + ":0:" + oper + ":" + ra + ":0:0:" + imm
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# memri p, $r, imm
if inst == 'memri':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*,\s*(0?x?\-?[0-9a-fA-F]+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
r = m.group(2)
imm = m.group(3)
# Add entry into structure.
progList[e]['inst_parse'] = "I-type:memri:" + page + ":0:0:" + r + ":0:0:" + imm
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# memwi p, $r, imm
if inst == 'memwi':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*,\s*(0?x?\-?[0-9a-fA-F]+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
r = m.group(2)
imm = m.group(3)
# Add entry into structure.
progList[e]['inst_parse'] = "I-type:memwi:" + page + ":0:0:0:0:" + r + ":" + imm
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# regwi p, $r, imm
if inst == 'regwi':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*,\s*(0?x?\-?[0-9a-fA-F]+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
r = m.group(2)
imm = m.group(3)
# Add entry into structure.
progList[e]['inst_parse'] = "I-type:regwi:" + page + ":0:0:" + r + ":0:0:" + imm
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# setbi ch, p, $r, t
if inst == 'setbi':
comp_re = "\s*(\d+)\s*,\s*(\d+)\s*,\s*\$(\d+)\s*,\s*(\-?\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
ch = m.group(1)
page = m.group(2)
ra = m.group(3)
t = m.group(4)
# Add entry into structure.
progList[e]['inst_parse'] = "I-type:setbi:" + page + ":" + ch + ":0:0:" + ra + ":0:" + t
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# J-type: three registers and an address for jump.
# J-type:<inst>:page:oper:ra:rb:rc:addr
# loopnz p, $r, @label
if inst == 'loopnz':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*,\s*\@(.+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
oper = "+"
r = m.group(2)
label = m.group(3)
# Resolve symbol.
if label in symbList:
label_addr = symbList[label]
else:
print("Error: could not resolve symbol %s on instruction @%d: %s %s" %(label,e,inst,args))
# Add entry into structure.
regs = r + ":" + r + ":0:" + str(label_addr)
progList[e]['inst_parse'] = "J-type:loopnz:" + page + ":" + oper + ":" + regs
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# condj p, $ra op $rb, @label
if inst == 'condj':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*([<>=!]+)\s*\$(\d+)\s*,\s*\@(.+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
ra = m.group(2)
oper = m.group(3)
rb = m.group(4)
label = m.group(5)
# Resolve symbol.
if label in symbList:
label_addr = symbList[label]
else:
print("Error: could not resolve symbol %s on instruction @%d: %s %s" %(label,e,inst,args))
# Add entry into structure.
regs = ra + ":" + rb + ":" + str(label_addr)
progList[e]['inst_parse'] = "J-type:condj:" + page + ":" + oper + ":0:" + regs
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# end
if inst == 'end':
# Add entry into structure.
progList[e]['inst_parse'] = "J-type:end:0:0:0:0:0:0"
# R-type: 8 registers, 7 for reading and 1 for writing.
# R-type:<inst>:page:channel:oper:ra:rb:rc:rd:re:rf:rg:rh
# math p, $ra, $rb oper $rc
if inst == 'math':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)\s*([\+\-\*])\s*\$(\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
ra = m.group(2)
rb = m.group(3)
oper = m.group(4)
rc = m.group(5)
# Add entry into structure.
regs = ra + ":" + rb + ":" + rc + ":0:0:0:0:0"
progList[e]['inst_parse'] = "R-type:math:" + page + ":0:" + oper + ":" + regs
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# set ch, p, $ra, $rb, $rc, $rd, $re, $rt
if inst == 'set':
regs = "\s*\$(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)"
comp_re = "\s*(\d+)\s*,\s*(\d+)\s*," + regs;
m = re.search(comp_re, args)
# If there is a match.
if m:
ch = m.group(1)
page = m.group(2)
ra = m.group(3)
rb = m.group(4)
rc = m.group(5)
rd = m.group(6)
ree = m.group(7)
rt = m.group(8)
# Add entry into structure.
regs = ra + ":" + rt + ":" + rb + ":" + rc + ":" + rd + ":" + ree + ":0"
progList[e]['inst_parse'] = "R-type:set:" + page + ":" + ch + ":0:0:" + regs
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# sync p, $r
if inst == 'sync':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
r = m.group(2)
# Add entry into structure.
progList[e]['inst_parse'] = "R-type:sync:" + page + ":0:0:0:0:" + r + ":0:0:0:0:0"
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# read ch, p, oper $r
if inst == 'read':
comp_re = "\s*(\d+)\s*,\s*(\d+)\s*,\s*(upper|lower)\s+\$(\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
ch = m.group(1)
page = m.group(2)
oper = m.group(3)
r = m.group(4)
# Add entry into structure.
progList[e]['inst_parse'] = "R-type:read:" + page + ":" + ch + ":" + oper + ":" + r + ":0:0:0:0:0:0:0"
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# wait ch, p, $r
if inst == 'wait':
comp_re = "\s*(\d+)\s*,\s*(\d+)\s*,\s*\$(\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
ch = m.group(1)
page = m.group(2)
r = m.group(3)
# Add entry into structure.
progList[e]['inst_parse'] = "R-type:wait:" + page + ":" + ch + ":0:0:0:" + r + ":0:0:0:0:0"
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# bitw p, $ra, $rb oper $rc
if inst == 'bitw':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)\s*([&|<>^]+)\s*\$(\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
ra = m.group(2)
rb = m.group(3)
oper = m.group(4)
rc = m.group(5)
# Add entry into structure.
regs = ra + ":" + rb + ":" + rc + ":0:0:0:0:0"
progList[e]['inst_parse'] = "R-type:bitw:" + page + ":0:" + oper + ":" + regs
# bitw p, $ra, ~$rb
else:
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*,\s*~\s*\$(\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
ra = m.group(2)
rb = m.group(3)
oper = "~"
# Add entry into structure.
regs = ra + ":0:" + ":" + rb + ":0:0:0:0:0"
progList[e]['inst_parse'] = "R-type:bitw:" + page + ":0:" + oper + ":" + regs
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# memr p, $ra, $rb
if inst == 'memr':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
ra = m.group(2)
rb = m.group(3)
# Add entry into structure.
regs = ra + ":" + rb + ":0:0:0:0:0:0"
progList[e]['inst_parse'] = "R-type:memr:" + page + ":0:0:" + regs
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# memw p, $ra, $rb
if inst == 'memw':
comp_re = "\s*(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)";
m = re.search(comp_re, args)
# If there is a match.
if m:
page = m.group(1)
ra = m.group(2)
rb = m.group(3)
# Add entry into structure.
regs = rb + ":" + ra + ":0:0:0:0:0"
progList[e]['inst_parse'] = "R-type:memw:" + page + ":0:0:0:" + regs
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
# setb ch, p, $ra, $rb, $rc, $rd, $re, $rt
if inst == 'setb':
regs = "\s*\$(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)\s*,\s*\$(\d+)"
comp_re = "\s*(\d+)\s*,\s*(\d+)\s*," + regs;
m = re.search(comp_re, args)
# If there is a match.
if m:
ch = m.group(1)
page = m.group(2)
ra = m.group(3)
rb = m.group(4)
rc = m.group(5)
rd = m.group(6)
ree = m.group(7)
rt = m.group(8)
# Add entry into structure.
regs = ra + ":" + rt + ":" + rb + ":" + rc + ":" + rd + ":" + ree + ":0"
progList[e]['inst_parse'] = "R-type:setb:" + page + ":" + ch + ":0:0:" + regs
# Error: bad instruction format.
else:
print("Error: bad format on instruction @%d: %s" %(e,inst))
######################################
### Second pass: convert to binary ###
######################################
for e in progList:
inst = progList[e]['inst_parse']
spl = inst.split(":")
# I-type
if spl[0] == "I-type":
# Instruction.
if spl[1] in instList:
inst_bin = instList[spl[1]]['bin']
else:
print("Error: instruction %s not found on instraction list" % spl[1])
# page.
page = unsigned2bin(spl[2],3)
# channel
ch = unsigned2bin(spl[3],3)
# oper
oper = op2bin(spl[4])
# Registers.
ra = unsigned2bin(spl[5],5)
rb = unsigned2bin(spl[6],5)
rc = unsigned2bin(spl[7],5)
# Immediate.
imm = integer2bin(spl[8],31)
# Machine code (bin/hex).
code = inst_bin + page + ch + oper + ra + rb + rc + imm
code_h = "{:016x}".format(int(code,2))
# Write values back into hash.
progList[e]['inst_bin'] = code
progList[e]['inst_hex'] = code_h
elif (spl[0] == "J-type"):
# Instruction.
if spl[1] in instList:
inst_bin = instList[spl[1]]['bin']
else:
print("Error: instruction %s not found on instraction list" % spl[1])
# Page.
page = unsigned2bin(spl[2],3)
# Zeros.
z3 = unsigned2bin("0",3)
#oper
oper = op2bin(spl[3])
# Registers.
ra = unsigned2bin(spl[4],5)
rb = unsigned2bin(spl[5],5)
rc = unsigned2bin(spl[6],5)
# Zeros.
z15 = unsigned2bin("0",15)
# Address.
jmp_addr = unsigned2bin(spl[7],16)
# Machine code (bin/hex).
code = inst_bin + page + z3 + oper + ra + rb + rc + z15 + jmp_addr
code_h = "{:016x}".format(int(code,2))
# Write values back into hash.
progList[e]['inst_bin'] = code
progList[e]['inst_hex'] = code_h
elif (spl[0] == "R-type"):
# Instruction.
if spl[1] in instList:
inst_bin = instList[spl[1]]['bin']
else:
print("Error: instruction \"%s\" not found on instraction list" % spl[1])
# Page.
page = unsigned2bin(spl[2],3)
# Channel
ch = unsigned2bin(spl[3],3)
# Oper
oper = op2bin(spl[4])
# Registers.
ra = unsigned2bin(spl[5],5)
rb = unsigned2bin(spl[6],5)
rc = unsigned2bin(spl[7],5)
rd = unsigned2bin(spl[8],5)
ree = unsigned2bin(spl[9],5)
rf = unsigned2bin(spl[10],5)
rg = unsigned2bin(spl[11],5)
rh = unsigned2bin(spl[12],5)
# Zeros.
z6 = unsigned2bin("0",6)
# Machine code (bin/hex).
code = inst_bin + page + ch + oper + ra + rb + rc + rd + ree + rf + rg + rh + z6
code_h = "{:016x}".format(int(code,2))
# Write values back into hash.
progList[e]['inst_bin'] = code
progList[e]['inst_hex'] = code_h
else:
print("Error: bad type on instruction @%d: %s" %(e,inst))
####################
### Write output ###
####################
# Binary format.
if outfmt == "bin":
for e in progList:
outProg[e] = progList[e]['inst_bin']
# Hex format.
elif outfmt == "hex":
for e in progList:
out = progList[e]['inst_hex'] + " -> " + progList[e]['inst'] + " " + progList[e]['args']
outProg[e] = out
else:
print("Error: \"%s\" is not a recognized output format" % outfmt)
# Return program list.
return outProg