from enum import Enum
from math import ceil
import sys
import os
import textwrap
import copy
from prettytable import PrettyTable
def eprint(*args, **kwargs) -> void:
print(*args, file=sys.stderr, **kwargs)
PAGE_SIZE: int = 64
MAGIC_NUMBER: str = "@*--spryte-"
ALLOWED_LABEL_CHARS: str = "_"
CONTAINERS: str = "{}[]()"
DIRECTIVE_START: str = "."
SPACE: str = " "
NEWLINE: str = ""
ESCAPE: str = "␛"
NULL: str = "␀"
BELL: str = "␇"
STRING_MARKER: str = "Ⓢ"
SEGMENT_MAP: dict(str, int) = {
"text": 0,
"data": 1,
"rodata": 2
}
REGISTER_MAP: dict(str, int) = {
"r0": 0,
"r1": 1,
"r2": 2,
"r3": 3,
"r4": 4,
"r5": 5,
"r6": 6,
"r7": 7,
"r8": 8,
"r9": 9,
"r10": 10, "ra": 10,
"r11": 11, "rb": 11,
"r12": 12, "rc": 12, "fp": 12,
"r13": 13, "rd": 13, "sp": 13,
"r14": 14, "re": 14, "lr": 14,
"r15": 15, "rf": 15, "pc": 15
}
class Conditional(Enum):
LESS = 0
LESS_EQ = 1
EQUAL = 2
GREATER = 3
GT_EQ = 4
NOT_EQ = 5
UNCOND = 6
CONDITIONAL_MAP: dict(str, str) = {
"lt": Conditional.LESS,
"le": Conditional.LESS_EQ,
"eq": Conditional.EQUAL,
"gt": Conditional.GREATER,
"ge": Conditional.GT_EQ,
"ne": Conditional.NOT_EQ
}
class IdxMode(Enum):
IMM = 0
REG = 1
ABS = 2
ABSRO = 3
RI = 4
RIRO = 5
class Opcode:
def __init__(self, char: str, addr_modes: tuple(IdxMode), has_conditional=False,
has_target_reg=True, has_return=False, has_second_reg=False):
self.char = char
self.addr_modes = addr_modes
self.has_conditional = has_conditional
self.has_target_reg = has_target_reg
self.has_return = has_return
self.has_second_register = has_second_reg
def has_idx_mode(self, mode: IdxMode) -> bool:
return (mode in self.addr_modes)
OPCODE_MAP: dict(str, Opcode) = {
"mov": Opcode("!", (IdxMode.IMM, IdxMode.REG)),
"ldr": Opcode("\"", (IdxMode.ABS, IdxMode.ABSRO, IdxMode.RI, IdxMode.RIRO)),
"str": Opcode("#", (IdxMode.ABS, IdxMode.ABSRO, IdxMode.RI, IdxMode.RIRO)),
"swap": Opcode("$", (IdxMode.REG)),
"b": Opcode("%0", (IdxMode.IMM,),
has_conditional=True, has_target_reg=False),
"bs": Opcode("%1", (IdxMode.IMM,),
has_conditional=True, has_target_reg=False),
"br": Opcode("%2", (),
has_conditional=True, has_target_reg=False),
"bl": Opcode("&", (IdxMode.IMM,),
has_conditional=True, has_target_reg=False),
"svc": Opcode("'", (), has_target_reg=False),
"cmp": Opcode("(", (IdxMode.REG, IdxMode.IMM)),
"add": Opcode(")", (IdxMode.REG, IdxMode.IMM), has_return=True),
"sub": Opcode("*", (IdxMode.REG, IdxMode.IMM), has_return=True),
"mul": Opcode("+", (IdxMode.REG, IdxMode.IMM), has_return=True),
"div": Opcode(",", (IdxMode.REG, IdxMode.IMM), has_return=True),
"pow": Opcode("-", (IdxMode.REG, IdxMode.IMM), has_return=True),
"root": Opcode(".", (IdxMode.REG, IdxMode.IMM), has_return=True),
"log": Opcode("/", (IdxMode.REG, IdxMode.IMM), has_return=True),
"mod": Opcode("0", (IdxMode.REG, IdxMode.IMM), has_return=True),
"flr": Opcode("1", (IdxMode.REG, IdxMode.IMM), has_return=True, has_target_reg=False),
"rnd": Opcode("2", (IdxMode.REG, IdxMode.IMM), has_return=True, has_target_reg=False),
"cei": Opcode("3", (IdxMode.REG, IdxMode.IMM), has_return=True, has_target_reg=False),
"sqrt": Opcode("4", (IdxMode.REG, IdxMode.IMM), has_return=True, has_target_reg=False),
"sqr": Opcode("5", (IdxMode.REG, IdxMode.IMM), has_return=True, has_target_reg=False),
"sin": Opcode("6", (IdxMode.REG, IdxMode.IMM), has_return=True, has_target_reg=False),
"cos": Opcode("7", (IdxMode.REG, IdxMode.IMM), has_return=True, has_target_reg=False),
"tan": Opcode("8", (IdxMode.REG, IdxMode.IMM), has_return=True, has_target_reg=False),
"join": Opcode("9", (IdxMode.REG, IdxMode.IMM), has_return=True),
"ltr": Opcode(":", (IdxMode.REG, IdxMode.IMM), has_return=True),
"subs": Opcode(";", (IdxMode.REG, IdxMode.IMM), has_return=True, has_second_reg=True),
"len": Opcode("<", (IdxMode.REG, IdxMode.IMM), has_return=True, has_target_reg=False),
"upp": Opcode("=", (IdxMode.REG, IdxMode.IMM), has_return=True, has_target_reg=False),
"low": Opcode(">", (IdxMode.REG, IdxMode.IMM), has_return=True, has_target_reg=False),
"push": Opcode("?", (IdxMode.REG, IdxMode.IMM), has_return=False, has_target_reg=False),
"pop": Opcode("@", (), has_return=True, has_target_reg=False)
}
PSEUDO_OPCODES: list(str) = [
"adr"
]
class TokenType(Enum):
OPCODE = 0
REGISTER = 1
VALUE = 2
LABEL = 3
LABEL_DEF = 4
COMMA = 5
CONTAINER = 6
MACRO = 7
DIRECTIVE = 8
IMMEDIATE = 9
ESCAPE = 10
ADD = 11
NEWLINE = 12
PSEUDO_OP = 13
ANON_LBL = 14
INVALID = 15
class Token:
def __init__(self, type: TokenType, value: str, x: int, y: int, source_file : str = ""):
self.type = type
self.value = value
self.x = x
self.y = y
self.source_file = source_file
def set_type(self, type: TokenType) -> Token:
self.type = type
return self
def __str__(self):
return f"{self.y + 1}:{self.x + 1}".ljust(7) + f"| {self.type.name}".ljust(11) + f" | '{self.value}'"
def __repr__(self):
return '\n ' + self.__str__()
def copy(self):
return copy.copy(self)
file_name: str = ""
error_count: int = 0
macros: dict(str, list(Token)) = {}
def visible_code(data: list(str), x: int, y: int, source_file: str, range: int = 2) -> void:
if (len(source_file) > 0 and source_file[0] != '\0'):
with open(source_file) as file:
data = file.read().splitlines()
pad: int = len(str(y + 1 + range)) + 1
y_iter: int = y
y_iter -= range
if (y_iter < 0): y_iter = 0
while (y_iter < len(data) and y_iter < y + 1):
eprint(str(y_iter + 1).ljust(pad) + "| " + data[y_iter])
y_iter += 1
eprint("".ljust(pad) + "| " + "".ljust(x) + "\x1b[1;32m^\x1b[22;39m")
while (y_iter < len(data) and y_iter < y + range + 1):
eprint(str(y_iter + 1).ljust(pad) + "| " + data[y_iter])
y_iter += 1
def visible_note(message: str, data: list(str), x: int, y: int, source_file: str, range: int = 2) -> void:
eprint(f"\x1b[1m{source_file}:\x1b[35m{y + 1}\x1b[39m:\x1b[35m{x + 1}\x1b[39m:\x1b[30m note: \x1b[39m{message}\x1b[22m")
visible_code(data, x, y, source_file, range)
def visible_warning(message: str, data: list(str), x: int, y: int, source_file: str, range: int = 2) -> void:
eprint(f"\x1b[1m{source_file}:\x1b[35m{y + 1}\x1b[39m:\x1b[35m{x + 1}\x1b[39m:\x1b[35m warning: \x1b[39m{message}\x1b[22m")
visible_code(data, x, y, source_file, range)
def visible_error(message: str, data: list(str), x: int, y: int, source_file: str, range: int = 2) -> void:
global error_count
error_count += 1
eprint(f"\x1b[1m{source_file}:\x1b[35m{y + 1}\x1b[39m:\x1b[35m{x + 1}\x1b[39m:\x1b[31m error: \x1b[39m{message}\x1b[22m")
visible_code(data, x, y, source_file, range)
def escape_char(char: str) -> str:
replacements: dict(str, str) = {
"n": NEWLINE,
"e": ESCAPE,
"0": NULL,
"b": BELL,
"\\": "\\",
"\"": "\"",
"'": "'"
}
if (char in replacements):
return replacements[char]
return ""
def read_word(data: list(str), macros: dict(str, list(Token)), source_file: str, x: int, y: int) -> tuple(int, Token):
class Mode(Enum):
NAME = 0
COMM = 1
CONT = 2
DIR = 3
STR = 4
IMM = 5
NUM = 6
SEMI = 7
ESC = 8
ADD = 9
LDEF = 10
INV = 11
def get_mode(char: str) -> Mode:
if (char.isnumeric() or char == "-"):
return Mode.NUM
if (char.isalpha() or char in ALLOWED_LABEL_CHARS):
return Mode.NAME
if (char == ","):
return Mode.COMM
if (char in CONTAINERS):
return Mode.CONT
if (char == DIRECTIVE_START):
return Mode.DIR
if (char in "\"'"):
return Mode.STR
if (char == "#"):
return Mode.IMM
if (char == ";"):
return Mode.SEMI
if (char == "\\"):
return Mode.ESC
if (char == "+"):
return Mode.ADD
if (char == ":"):
return Mode.LDEF
return Mode.INV
word: str = ""
line_length: int = len(data[y])
while (x < line_length and data[y][x] == SPACE):
x += 1
if (x == line_length):
return (-1, Token(TokenType.INVALID, word, x, y))
first_x: int = x
first_char: str = data[y][x]
mode: Mode = get_mode(first_char)
return_token: Token = Token(TokenType.INVALID, first_char, first_x, y, source_file)
x += 1
match (mode):
case Mode.COMM:
return (x, return_token.set_type(TokenType.COMMA))
case Mode.CONT:
return (x, return_token.set_type(TokenType.CONTAINER))
case Mode.INV:
visible_error(f"invalid character: '{first_char.replace("\n", "\\n")}'", data, first_x, y, source_file)
return (x, return_token)
case Mode.IMM:
return (x, return_token.set_type(TokenType.IMMEDIATE))
case Mode.ESC:
return (x, return_token.set_type(TokenType.ESCAPE))
case Mode.ADD:
return (x, return_token.set_type(TokenType.ADD))
case Mode.LDEF:
if (x < line_length and data[y][x] in "+-"):
return_token.value = data[y][x]
x += 1
return (x, return_token.set_type(TokenType.ANON_LBL))
else:
return_token.value = ""
return (x, return_token.set_type(TokenType.LABEL_DEF))
case Mode.SEMI:
return (-1, return_token)
case Mode.NAME | Mode.NUM:
word += first_char
if (mode == Mode.STR):
while (x < line_length and (char := data[y][x]) != first_char):
if (char == "\\"):
if (x == line_length - 1):
visible_error(f"missing terminating '\"' character", data, first_x, y, source_file)
return (x, Token(TokenType.INVALID, word, first_x, y)) # string not closed with backslash at end
x += 1
word += escape_char(data[y][x])
else:
word += char
x += 1
if (x == line_length):
visible_error(f"missing terminating '\"' character", data, first_x, y, source_file)
return (-1, Token(TokenType.INVALID, word, x, y))
x += 1
return_token.value = word
return (x, return_token.set_type(TokenType.VALUE))
elif (mode == Mode.NUM):
while (x < line_length and get_mode(char := data[y][x]) in (Mode.NUM, Mode.DIR)):
word += char
x += 1
return_token.value = word
try:
float(word)
return (x, return_token.set_type(TokenType.VALUE))
except ValueError:
return (x, return_token)
while (x < line_length and get_mode(char := data[y][x]) in (Mode.NAME, Mode.NUM)):
word += char
x += 1
return_token.value = word
if (mode == Mode.DIR):
if (len(word) == 0):
return (x, return_token)
return (x, return_token.set_type(TokenType.DIRECTIVE))
if (x < line_length and data[y][x] == ":"):
x += 1
return (x, return_token.set_type(TokenType.LABEL_DEF))
if (word in REGISTER_MAP):
return (x, return_token.set_type(TokenType.REGISTER))
if (word in OPCODE_MAP):
return (x, return_token.set_type(TokenType.OPCODE))
if (word in PSEUDO_OPCODES):
return (x, return_token.set_type(TokenType.PSEUDO_OP))
if (word in macros.keys()):
return (x, return_token.set_type(TokenType.MACRO))
if (len(word) > 2 and (subword := word[:-2]) in OPCODE_MAP):
return (x, return_token.set_type(TokenType.OPCODE))
return (x, return_token.set_type(TokenType.LABEL))
def get_full_path(path: str):
if (os.path.isfile(path)):
return path
paths = os.environ.get("SC_LIB_PATH")
if (not paths is None):
paths = paths.split(":")
else:
return ""
for p in paths:
if (os.path.isfile(p + "/" + path)):
return p + "/" + path
return ""
def tokenise(data: list(str), file_name: str, tokens: list = [], macros: dict = {}, source_file: str = "") -> list:
global error_count
in_define: bool = False
define_single_line: bool = False
define_token: Token
define_name: str = ""
define_tokens: list(Token) = []
y: int = 0
while (y < len(data)):
x: int = 0
while (x < len(data[y])):
x, token = read_word(data, macros, source_file, x, y)
if (x == -1):
break
if (token.type == TokenType.MACRO):
if (in_define):
if (len(macros[token.value]) > 0 and macros[token.value][0].type == TokenType.VALUE):
if (len(define_tokens) > 0 and define_tokens[-1].type == TokenType.VALUE):
define_tokens[-1].value += macros[token.value][0].value
define_tokens += macros[token.value][1:]
continue
define_tokens += macros[token.value]
else:
if (len(macros[token.value]) > 0 and macros[token.value][0].type == TokenType.VALUE):
if (len(tokens) > 0 and tokens[-1].type == TokenType.VALUE):
tokens[-1].value += macros[token.value][0].value
tokens += macros[token.value][1:]
continue
tokens += macros[token.value].copy()
continue
if (token.type == TokenType.VALUE):
if (in_define):
if (len(define_tokens) > 0 and define_tokens[-1] == TokenType.VALUE):
define_tokens[-1] = define_tokens[-1].copy()
define_tokens[-1].value += token.value
continue
else:
if (len(tokens) > 0 and tokens[-1].type == TokenType.VALUE):
tokens[-1] = tokens[-1].copy()
tokens[-1].value += token.value
continue
if (token.type == TokenType.LABEL_DEF):
if (in_define):
visible_error("cannot define label inside macro", data, token.x, token.y, token.source_file)
break
tokens.append(token)
continue
if (token.type == TokenType.DIRECTIVE):
match (token.value):
case "deflines"|"define":
if (in_define):
visible_error("cannot define macro inside of definition", data, token.x, token.y, token.source_file)
break
define_token = token
define_single_line = (token.value == "define")
in_define = True
x, token = read_word(data, macros, source_file, x, y)
if (x > len(data[y]) or x == -1):
visible_error("undefined macro", data, len(data[y]), y)
break
if (token.type != TokenType.LABEL):
visible_error("invalid macro definition", data, token.x, token.y, token.source_file)
x = -1
break
define_name = token.value
continue
case "enddef":
if (not in_define):
visible_error("no define to close", data, token.x, token.y, token.source_file)
break
macros[define_name] = define_tokens.copy()
define_tokens.clear()
in_define = False
break
case "include":
x, path_token = read_word(data, macros, source_file, x, y)
if (path_token.type != TokenType.VALUE):
visible_error("expected string path", data, path_token.x, path_token.y, path_token.source_file)
break
full_path = get_full_path(path_token.value)
if (full_path != ""):
with open(full_path) as file:
tokenise(file.read().splitlines(), file_name = full_path.split("/")[-1],
tokens=tokens, macros=macros, source_file=full_path)
else:
visible_error("file doesn't exist", data, token.x, token.y, token.source_file)
break
if (in_define):
define_tokens.append(token)
else:
tokens.append(token)
if (in_define):
if (len(define_tokens) > 0):
if (define_tokens[-1].type == TokenType.ESCAPE):
define_tokens.pop()
else:
if (define_tokens[-1].type != TokenType.NEWLINE):
define_tokens.append(Token(TokenType.NEWLINE, "", len(data[y]), y))
if (define_single_line):
define_tokens = define_tokens[:-1]
macros[define_name] = define_tokens.copy()
define_tokens.clear()
in_define = False
else:
if (len(tokens) > 0):
if (tokens[-1].type == TokenType.ESCAPE):
tokens.pop()
else:
if (tokens[-1].type != TokenType.NEWLINE):
tokens.append(Token(TokenType.NEWLINE, "", len(data[y]), y))
y += 1
if (in_define):
visible_error("deflines not closed", data, define_token.x, define_token.y, define_token.source_file)
return tokens
def main() -> void:
global error_count
if len(sys.argv) == 1:
print("sc: error: no input files")
exit()
version_name: str = "bluby"
version_number: str = "0"
version_number_readable: str = "1.0.0"
entry_label: str = "_start"
data: list(str) = []
input_file: str = ""
output_file: str = ""
print_output: bool = False
verbose: bool = False
i = 1
while (i < len(sys.argv)):
if (sys.argv[i][0] == '-'):
if (len(sys.argv[i]) == 2):
opt = sys.argv[i][1:]
match (opt):
case 'o'|'out'|'output':
print_output = False
if (i + 1 == len(sys.argv)):
eprint(f"sc: expected output file after '-{opt}'")
exit()
i += 1
output_file = sys.argv[i]
output_dir: str = os.path.dirname(output_file)
if len(output_dir) > 0 and not os.path.isdir(output_dir):
eprint(f"sc: {os.path.dirname(output_file)}: does not exist")
exit()
case 'p'|'print':
print_output = True
output_file = ""
case 'v'|'verbose':
verbose = True
case _:
eprint(f"sc: unknown option '{opt}'")
else:
if len(input_file) == 0:
input_file = sys.argv[i]
else:
print("sc: error: only 1 input file allowed.")
exit()
i += 1
if len(input_file) == 0:
print("sc: error: no input files")
exit()
if not print_output and len(output_file) == 0:
output_file = os.path.splitext(input_file)[0] + '.out'
if os.path.isfile(input_file):
with open(input_file) as file:
data = file.read().splitlines()
else:
print(f"sc: error: invalid input file '{input_file}'")
exit()
file_name = '\0' + input_file.split("/")[-1]
macros: dict(str, list(Token)) = {}
tokens = tokenise(data, file_name, macros=macros, source_file=file_name)
bytecode: list(str) = []
labels: list(tuple(int, int, str)) = []
label_defines: dict(str, tuple(int, int)) = {}
segment_tokens: dict(int, int) = {}
segment_indices: dict(int, int) = {}
segment_offset: int = 0
last_anon_label_def: int = -1
forward_anon_labels: list(tuple(int, int, int)) = []
i: int = 0
while (i < len(tokens)):
token = tokens[i]
if (token.type == TokenType.LABEL_DEF):
if (len(token.value) == 0):
last_anon_label_def = len(bytecode) + segment_offset
for label_i in forward_anon_labels:
bytecode[label_i[0]] = str(last_anon_label_def - label_i[1]) + STRING_MARKER
forward_anon_labels.clear()
i += 1
token = tokens[i]
if (token.type == TokenType.NEWLINE):
i += 1
continue
if (token.value in label_defines):
visible_error(f"label '{token.value}' already defined", data, token.x, token.y, token.source_file)
first_token: Token = tokens[label_defines[token.value][1]]
visible_note("here", data, first_token.x, first_token.y)
while (tokens[i].type != TokenType.NEWLINE):
i += 1
i += 1
continue
label_defines[token.value] = (len(bytecode) + segment_offset, i)
i += 1
token = tokens[i]
if (token.type == TokenType.NEWLINE):
i += 1
continue
for _ in range(1): # I REALLY want to use 'break'. I'm sorry. Wait, no, I'm not sorry actually.
match (token.type):
case TokenType.PSEUDO_OP:
match token.value:
case "adr":
instruction = OPCODE_MAP["add"].char + str(IdxMode.IMM.value)
i += 1
token = tokens[i]
if (token.type != TokenType.REGISTER):
visible_error(f"expected register", data, token.x, token.y, token.source_file)
break
instruction += f"{REGISTER_MAP[token.value]:x}"
instruction += f"{REGISTER_MAP["pc"]:x}"
i += 1
token = tokens[i]
if (token.type != TokenType.COMMA):
visible_error(f"expected ','", data, token.x, token.y, token.source_file)
break
i += 1
token = tokens[i]
if (token.type == TokenType.IMMEDIATE):
i += 1
token = tokens[i]
bytecode.append(instruction)
if (token.type != TokenType.LABEL):
visible_error(f"expected label", data, token.x, token.y, token.source_file)
break
bytecode.append(str(-(len(bytecode) - 1 + segment_offset)) + STRING_MARKER)
labels.append((len(bytecode) - 1, i, token.value))
# subtract instruction position relative to the start of the segment
# and mark the label to be added later
case TokenType.OPCODE:
conditional: str = ""
opcode: Opcode
if (len(token.value) > 2 and (cond := token.value[-2:])):
if (cond in CONDITIONAL_MAP):
conditional = str(CONDITIONAL_MAP[cond].value)
opcode = OPCODE_MAP[token.value[:-2]]
if (not opcode.has_conditional):
visible_error(f"opcode does not have conditionals", data, token.x, token.y, token.source_file)
break
else:
opcode = OPCODE_MAP[token.value]
else:
opcode = OPCODE_MAP[token.value]
if (opcode.has_conditional and conditional == ""):
conditional = str(Conditional.UNCOND.value)
opcode_sym: str = opcode.char
arguments: str = ""
arg_count: int = int(opcode.has_return) + int(opcode.has_target_reg) + int(opcode.has_second_register)
previous_err_c: int = error_count
for arg in range(arg_count):
i += 1
token = tokens[i]
if (not token.type == TokenType.REGISTER):
visible_error(f"expected register", data, token.x, token.y, token.source_file)
break
else:
arguments += f"{REGISTER_MAP[token.value]:x}"
if (arg == arg_count - 1 and len(opcode.addr_modes) == 0):
continue
else:
i += 1
token = tokens[i]
if (tokens[i].type != TokenType.COMMA):
visible_error("expected ','", data, token.x, token.y, token.source_file)
break
# This is horrible, but I don't care. I'm evil.
if (error_count > previous_err_c):
break
i += 1
token = tokens[i]
args_end: int = i
is_immediate: bool = True
value: str = None
label: str = None
register_1: str = None
register_2: str = None
while (tokens[args_end].type != TokenType.NEWLINE):
args_end += 1
if (args_end > i):
if (len(opcode.addr_modes) == 0):
visible_error("instruction does not take any arguments", data, token.x, token.y, token.source_file)
break
args_end -= 1
else:
if (len(opcode.addr_modes) == 0):
bytecode.append(opcode_sym + conditional + arguments)
break
if (token.type == TokenType.CONTAINER):
if (token.value != '['):
visible_error("invalid expression", data, token.x, token.y, token.source_file)
break
is_immediate = False
i += 1
token = tokens[i]
if (is_immediate):
if (token.type == TokenType.IMMEDIATE):
i += 1
token = tokens[i]
if (not token.type in (TokenType.VALUE, TokenType.LABEL)):
visible_error("expected immediate value", data, token.x, token.y, token.source_file)
break
match (token.type):
case TokenType.REGISTER:
register_1 = token.value
arguments += f"{REGISTER_MAP[token.value]:x}"
case TokenType.VALUE:
value = token.value
case TokenType.LABEL:
label = token.value
value = str(-(len(bytecode) + segment_offset))
labels.append((len(bytecode) + 1, i, label))
case TokenType.ANON_LBL:
if (token.value == "-"):
if (last_anon_label_def == -1):
visible_error(f"matching ':' definition not found", data, token.x, token.y, token.source_file)
break
value = str(last_anon_label_def - (len(bytecode) + segment_offset))
else:
value = "+INVALID+"
forward_anon_labels.append((len(bytecode) + 1, len(bytecode) + segment_offset, i))
case _:
visible_error("expected register, value, or index", data, token.x, token.y, token.source_file)
break
i += 1
token = tokens[i]
if (token.type != TokenType.NEWLINE):
visible_error("expected end of line", data, token.x, token.y, token.source_file)
break
else:
previous_err_c: int = error_count
closed: bool = False
while (token.type != TokenType.NEWLINE):
if (token.type == TokenType.IMMEDIATE):
i += 1
token = tokens[i]
if (not token.type in (TokenType.VALUE, TokenType.LABEL)):
visible_error("expected immediate value", data, token.x, token.y, token.source_file)
break
match (token.type):
case TokenType.REGISTER:
if (not register_2 is None):
visible_error("cannot sum more than 2 registers", data, token.x, token.y, token.source_file)
break
arguments += f"{REGISTER_MAP[token.value]:x}"
if (not register_1 is None):
register_2 = token.value
else:
register_1 = token.value
case TokenType.LABEL:
if (not register_1 is None):
visible_error("cannot sum a relative label and a register", data, token.x, token.y, token.source_file)
break
if (not label is None):
visible_error("cannot sum two labels", data, token.x, token.y, token.source_file)
break
register_1 = "pc"
arguments += f"{REGISTER_MAP[register_1]:x}"
label = token.value
value = str(-(len(bytecode) + segment_offset))
labels.append((len(bytecode) + 1, i, label))
case TokenType.VALUE:
if (not value is None):
try:
value = str(int(value) + int(token.value))
except ValueError:
visible_error("index not integer", data, token.x, token.y, token.source_file)
break
else:
try:
value = str(int(token.value))
except ValueError:
visible_error("index not integer", data, token.x, token.y, token.source_file)
break
case _:
visible_error("invalid expression", data, token.x, token.y, token.source_file)
break
i += 1
token = tokens[i]
match (token.type):
case TokenType.CONTAINER:
if (token.value != ']'):
visible_error("invalid expression", data, token.x, token.y, token.source_file)
break
if (tokens[i + 1].type != TokenType.NEWLINE):
visible_error("expected end of line", data, token.x, token.y, token.source_file)
break
closed = True
case TokenType.ADD:
pass
case _:
visible_error("invalid expression", data, token.x, token.y, token.source_file)
break
i += 1
token = tokens[i]
# This is horrible, but I don't care.
# I look so good in chainmail that it's not renaissance fair.
if (error_count > previous_err_c):
break
if (not closed):
visible_error("expected ']'", data, token.x, token.y, token.source_file)
break
indexing_mode: IdxMode
if (is_immediate):
if (not register_1 is None):
indexing_mode = IdxMode.REG
else:
indexing_mode = IdxMode.IMM
else:
if (not register_1 is None):
if (not register_2 is None):
if (not label is None or not value is None):
visible_error("cannot sum 2 registers and an absolute value", data, token.x, token.y, token.source_file)
break
indexing_mode = IdxMode.RIRO
else:
if (not label is None or not value is None):
indexing_mode = IdxMode.ABSRO
else:
indexing_mode = IdxMode.RI
else:
if (not label is None or not value is None):
indexing_mode = IdxMode.ABS
if (not opcode.has_idx_mode(indexing_mode)):
visible_error(f"'{[key for key, val in OPCODE_MAP.items() if val == opcode][0]}' does not support {indexing_mode.name} indexing", data, token.x, token.y, token.source_file)
break
addr_mode_str = str(indexing_mode.value)
if (len(opcode.addr_modes) == 1):
addr_mode_str = ""
bytecode.append(opcode_sym + conditional + addr_mode_str + arguments)
if (not value is None or not label is None):
bytecode.append(STRING_MARKER)
if (not value is None):
bytecode[-1] = value + STRING_MARKER
case TokenType.DIRECTIVE:
if (token.value in SEGMENT_MAP):
key = SEGMENT_MAP[token.value]
if (len(bytecode) > 0 and len(segment_tokens) == 0):
segment_tokens[SEGMENT_MAP["text"]] = 0
segment_indices[SEGMENT_MAP["text"]] = 0
if (len(bytecode) in segment_indices.values()):
del segment_indices[list(segment_indices.keys())[list(segment_indices.values()).index(len(bytecode))]]
if (key in segment_tokens):
visible_error(f".{token.value} segment already defined", data, token.x, token.y, token.source_file)
initial_token: Token = tokens[segment_tokens[key]]
visible_note("here", data, initial_token.x, initial_token.y)
else:
segment_tokens[key] = i
segment_indices[key] = len(bytecode)
next_page_idx = ceil((len(bytecode) + segment_offset) / PAGE_SIZE) * PAGE_SIZE
segment_offset += next_page_idx - len(bytecode) + segment_offset
break
match (token.value):
case "segment"|"global":
pass
case "string":
i += 1
token = tokens[i]
if (token.type != TokenType.VALUE):
visible_error("expected string", data, token.x, token.y, token.source_file)
break
bytecode.append(STRING_MARKER + token.value + STRING_MARKER)
case "chars":
i += 1
token = tokens[i]
if (token.type != TokenType.VALUE):
visible_error("expected value", data, token.x, token.y, token.source_file)
break
bytecode += [char for char in token.value]
case "items":
i += 1
token = tokens[i]
if (token.type != TokenType.VALUE):
visible_error("expected integer", data, token.x, token.y, token.source_file)
break
try:
bytecode += ["0" for _ in range(int(token.value))]
except ValueError:
visible_error("expected integer", data, token.x, token.y, token.source_file)
break
case _:
visible_error(f"invalid directive '{token.value}'", data, token.x, token.y, token.source_file)
break
case _:
visible_error("expected expression", data, token.x, token.y, token.source_file)
while (tokens[i].type != TokenType.NEWLINE):
i += 1
i += 1
if (len(bytecode) > 0 and len(segment_tokens) == 0):
segment_tokens[SEGMENT_MAP["text"]] = 0
segment_indices[SEGMENT_MAP["text"]] = 0
if (len(forward_anon_labels) > 0):
for label in forward_anon_labels:
visible_error(f"matching ':' definition not found", data, tokens[label[2]].x, tokens[label[2]].y, tokens[label[2]].source_file)
for label in labels:
if (not label[2] in label_defines):
visible_error(f"undefined label '{label[2]}'", data, tokens[label[1]].x, tokens[label[1]].y, tokens[label[1]].source_file)
continue
offset_str: str = bytecode[label[0]][:-1]
offset: int = 0
if (len(offset_str) != 0):
offset = int(offset_str)
bytecode[label[0]] = str(label_defines[label[2]][0] + offset) + STRING_MARKER
if (not entry_label in label_defines):
eprint(f"sc: error: entry point '{entry_label}' not found")
error_count += 1
if (verbose):
table = PrettyTable()
table.title = "tokens"
table.field_names = ["index", "source", "type", "value"]
for i, token in enumerate(tokens):
table.add_row([i, f"{token.x}:{token.y}", token.type.name, textwrap.fill(token.value, width=10)])
print(table)
table.clear()
table.title = "bytecode"
table.field_names = ["index", "value"]
for i, inst in enumerate(bytecode):
table.add_row([i, textwrap.fill(inst, width=10)])
print(table)
table.clear()
table.title = "labels"
table.field_names = ["name", "bytecode idx", "token idx"]
for label in labels:
table.add_row([label[2], label[0], label[1]])
print(table)
table.clear()
table.title = "label defines"
table.field_names = ["name", "bytecode idx", "token idx"]
for key, value in label_defines.items():
table.add_row([key, value[0], value[1]])
print(table)
table.clear()
table.title = "macros"
table.field_names = ["name", "value"]
table.align = "l"
for key, value in macros.items():
table.add_row([key, value])
print(table)
if (error_count > 0):
eprint(f"sc: failed with {error_count} error{"s" if error_count > 1 else ""}")
return
output_code = MAGIC_NUMBER + version_name + version_number \
+ STRING_MARKER + version_number_readable + STRING_MARKER
output_code += str(label_defines[entry_label][0]) + STRING_MARKER
output_code += str(list(segment_indices.keys())[0]) + "=" + str(list(segment_indices.values())[0])
for key, value in list(segment_indices.items())[1:]:
output_code += ";" + str(key) + "=" + str(value)
output_code += STRING_MARKER
output_code += "".join(bytecode)
if (print_output or verbose):
print(output_code)
else:
with open(output_file, "w") as file:
file.write(output_code)
if __name__ == "__main__":
main()