[Coursera][From Nand to Tetris / Part I] 第六周 汇编器项目 python 实现

今天折腾一上午，终于完成了 Coursera 上 From Nand to Tetris / Part I 这个课程的最后一个汇编器项目。这套课程真是没白跟，收获良多，现在已经等不及想看下一期的软件部分了，哈哈。

下面是我的 python 实现，存个档，同时给同样在看这课程的同学们参考。

注释风格看起来可能有点奇怪，拍脑袋想的，没多少 python 编码经验，还望包涵，稍微解释一下：

#-----------------#
# 大块代码用途描述 #
#-----------------#

## 分级注释

### 分级注释

#### 分级注释

import sys
import os.path


#--------#
# tables #
#--------#

## symbol table

SYMB_TABLE = {
    "SP":     0,
    "LCL":    1,
    "ARG":    2,
    "THIS":   3,
    "THAT":   4,
    "R0":     0,
    "R1":     1,
    "R2":     2,
    "R3":     3,
    "R4":     4,
    "R5":     5,
    "R6":     6,
    "R7":     7,
    "R8":     8,
    "R9":     9,
    "R10":    10,
    "R11":    11,
    "R12":    12,
    "R13":    13,
    "R14":    14,
    "R15":    15,
    "SCREEN": 16384,
    "KBD":    24576
}

## comp table

COMP_TABLE = {
    "0":   "0101010",
    "1":   "0111111",
    "-1":  "0111010",
    "D":   "0001100",
    "A":   "0110000",
    "!D":  "0001101",
    "!A":  "0110001",
    "-D":  "0001111",
    "-A":  "0110011",
    "D+1": "0011111",
    "A+1": "0110111",
    "D-1": "0001110",
    "A-1": "0110010",
    "D+A": "0000010",
    "D-A": "0010011",
    "A-D": "0000111",
    "D&A": "0000000",
    "D|A": "0010101",
    "M":   "1110000",
    "!M":  "1110001",
    "-M":  "1110011",
    "M+1": "1110111",
    "M-1": "1110010",
    "D+M": "1000010",
    "D-M": "1010011",
    "M-D": "1000111",
    "D&M": "1000000",
    "D|M": "1010101"
}

## dest table

DEST_TABLE = {
    "null": "000",
    "M":    "001",
    "D":    "010",
    "MD":   "011",
    "A":    "100",
    "AM":   "101",
    "AD":   "110",
    "AMD":  "111"
}

## jump table

JUMP_TABLE = {
    "null": "000",
    "JGT":  "001",
    "JEQ":  "010",
    "JGE":  "011",
    "JLT":  "100",
    "JNE":  "101",
    "JLE":  "110",
    "JMP":  "111"
}


#------------------#
# helper functions #
#------------------#

## determine is Int

def isInt(str):
    try:
        int(str)
        return True
    except ValueError:
        return False

## determine instruction type

def getInsType(ins):
    if ins[0] == '@':
        return 'a'
    return 'c'

## split instruction

### instruction A

ram_variable_num = 16

def valueOfAIns(ins):
    global ram_variable_num

    if SYMB_TABLE.has_key(ins[1:]):
        ins = SYMB_TABLE[ins[1:]]
    elif isInt(ins[1:]):
        ins = ins[1:]
    else:
        SYMB_TABLE[ins[1:]] = ram_variable_num
        ram_variable_num += 1
        ins = SYMB_TABLE[ins[1:]]

    bin_value =  bin(int(ins))[2:]
    zero_count = 16 - len(bin_value)
    zero_str =   '0' * zero_count

    return zero_str + bin_value

### instruction C

def splitCIns(ins):
    c_parts = {}

    dest_splited = ins.split('=')
    if len(dest_splited) == 1:
        c_parts['dest'] = 'null'
        jump_splited = dest_splited[0].split(';')
    else:
        c_parts['dest'] = dest_splited[0]
        jump_splited = dest_splited[1].split(';')

    if len(jump_splited) == 1:
        c_parts['jump'] = 'null'
    else:
        c_parts['jump'] = jump_splited[1]

    c_parts['comp'] = jump_splited[0]

    return c_parts


#------------#
# main logic #
#------------#

## first pass

### source file

sf_name = sys.argv[1]
sf = open(sf_name, 'r')

### destination file

df_name = os.path.splitext(sf_name)[0] + ".tmp"
df = open(df_name, 'w')

line_num = 0

for ins in sf:
    # comment
    ins = ins.split('//')[0]

    # white space
    ins = ins.strip()
    if len(ins) == 0: continue

    # label
    if ins[0] == '(' and ins[-1] == ')':
        SYMB_TABLE[ins[1:-1]] = line_num
        continue

    df.write(ins + '\n')
    line_num += 1

sf.close()
df.close()

## second pass

### source file

sf_name = os.path.splitext(sf_name)[0] + ".tmp"
sf = open(sf_name, 'r')

### destination file

df_name = os.path.splitext(sf_name)[0] + ".hack"
df = open(df_name, 'w')

for ins in sf:
    ins = ins.strip()

    ins_type = getInsType(ins)
    if ins_type == 'a':
        val = valueOfAIns(ins) + '\n'
        df.write(val)
    elif ins_type == 'c':
        parts = splitCIns(ins)
        val = '111' + COMP_TABLE[parts['comp']] + DEST_TABLE[parts['dest']] + JUMP_TABLE[parts['jump']] + '\n'
        df.write(val)

sf.close()
df.close()

[Coursera][From Nand to Tetris / Part I] 第六周汇编器项目 python 实现

AlanZhang

引用和评论

自己给 steamdeck 换肩键微动

python与nodejs哪个性能高

Anaconda安装教程以及Anaconda和pip配置国内镜像

如何减少跨团队交付摩擦？——基于 DevOps 与敏捷的最佳实践

Python 描述符

科学计算编程涉及到的技术栈简介

使用 chardet 判断文件编码需要注意的坑——过大的文件会导致高耗时

[Coursera][From Nand to Tetris / Part I] 第六周 汇编器项目 python 实现

AlanZhang

引用和评论

自己给 steamdeck 换肩键微动

python与nodejs哪个性能高

Anaconda安装教程以及Anaconda和pip配置国内镜像

如何减少跨团队交付摩擦？——基于 DevOps 与敏捷的最佳实践

Python 描述符

科学计算编程涉及到的技术栈简介

使用 chardet 判断文件编码需要注意的坑——过大的文件会导致高耗时

[Coursera][From Nand to Tetris / Part I] 第六周汇编器项目 python 实现