Exercise

Enhance your understanding of how SSDs work by getting hands-on practice ​with the exercise provided in this lesson!

We'll cover the following...

Simulator

This section introduces ssd.py, a simple SSD simulator you can use to understand better how SSDs work. See the previous lesson for details on how to run the simulator.

#! /usr/bin/env python

from __future__ import print_function
from collections import *
from optparse import OptionParser
import random
import string

class ssd:
    def __init__(self, ssd_type, num_logical_pages, num_blocks, pages_per_block,
                 block_erase_time, page_program_time, page_read_time,
                 high_water_mark, low_water_mark, trace_gc, show_state):
        # type
        self.TYPE_DIRECT = 1
        self.TYPE_LOGGING = 2
        self.TYPE_IDEAL = 3
        
        if ssd_type == 'direct':
            self.ssd_type = self.TYPE_DIRECT
        elif ssd_type == 'log':
            self.ssd_type = self.TYPE_LOGGING
        elif ssd_type == 'ideal':
            self.ssd_type = self.TYPE_IDEAL
        else:
            print('bad SSD type (%s)' % ssd_type)
            exit(1)
        
        # size
        self.num_logical_pages = num_logical_pages
        self.num_blocks = num_blocks
        self.pages_per_block = pages_per_block

        # parameters
        self.block_erase_time = block_erase_time
        self.page_program_time = page_program_time
        self.page_read_time = page_read_time

        # init each page of each block to INVALID
        self.STATE_INVALID = 1
        self.STATE_ERASED = 2
        self.STATE_VALID = 3
        
        self.num_pages = self.num_blocks * self.pages_per_block
        self.state = {}
        for i in range(self.num_pages):
            self.state[i] = self.STATE_INVALID

        # data itself
        self.data = {}
        for i in range(self.num_pages):
            self.data[i] = ' '

        # LOGGING stuff
        # reverse map: for each physical page, what LOGICAL page refers to it?
        # which page to write to right now?
        self.current_page = -1
        self.current_block = 0
        
        # gc counts
        self.gc_count = 0
        self.gc_current_block = 0
        self.gc_high_water_mark = high_water_mark
        self.gc_low_water_mark = low_water_mark
        
        self.gc_trace = trace_gc
        self.show_state = show_state

        # can use this as a log block
        self.gc_used_blocks = {}
        for i in range(self.num_blocks):
            self.gc_used_blocks[i] = 0

        # counts so as to help the GC
        self.live_count = {}
        for i in range(self.num_blocks):
            self.live_count[i] = 0

        # FTL
        self.forward_map = {}
        for i in range(self.num_logical_pages):
            self.forward_map[i] = -1

        self.reverse_map = {}
        for i in range(self.num_pages):
            self.reverse_map[i] = -1

        # stats
        self.physical_erase_count = {}
        self.physical_read_count = {}
        self.physical_write_count = {}

        for i in range(self.num_blocks):
            self.physical_erase_count[i] = 0
            self.physical_read_count[i] = 0
            self.physical_write_count[i] = 0

        self.physical_erase_sum = 0
        self.physical_write_sum = 0
        self.physical_read_sum = 0

        self.logical_trim_sum = 0
        self.logical_write_sum = 0
        self.logical_read_sum = 0

        self.logical_trim_fail_sum = 0
        self.logical_write_fail_sum = 0
        self.logical_read_fail_sum = 0
        return

    def blocks_in_use(self):
        used = 0
        for i in range(self.num_blocks):
            used += self.gc_used_blocks[i]
        return used

    def physical_erase(self, block_address):
        page_begin = block_address * self.pages_per_block
        page_end = page_begin + self.pages_per_block - 1
        
        for page in range(page_begin, page_end + 1):
            self.data[page] = ' '
            self.state[page] = self.STATE_ERASED

        # now, definitely NOT in use
        self.gc_used_blocks[block_address] = 0

        # STATS
        self.physical_erase_count[block_address] += 1
        self.physical_erase_sum += 1
        return

    def physical_program(self, page_address, data):
        self.data[page_address] = data
        self.state[page_address] = self.STATE_VALID
        # STATS
        self.physical_write_count[page_address / self.pages_per_block] += 1
        self.physical_write_sum += 1
        return 

    def physical_read(self, page_address):
        # STATS
        self.physical_read_count[page_address / self.pages_per_block] += 1
        self.physical_read_sum += 1
        return self.data[page_address]

    def read_direct(self, address):
        return self.physical_read(address)

    def write_direct(self, page_address, data):
        block_address = page_address / self.pages_per_block
        page_begin = block_address * self.pages_per_block
        page_end = page_begin + self.pages_per_block - 1

        old_list = []
        for old_page in range(page_begin, page_end + 1):
            if self.state[old_page] == self.STATE_VALID:
                old_data = self.physical_read(old_page)
                old_list.append((old_page, old_data))

        self.physical_erase(block_address)
        for (old_page, old_data) in old_list:
            if old_page == page_address:
                continue
            self.physical_program(old_page, old_data)
            
        self.physical_program(page_address, data)
        self.forward_map[page_address] = page_address
        self.reverse_map[page_address] = page_address
        return 'success'

    def write_ideal(self, page_address, data):
        self.physical_program(page_address, data)
        self.forward_map[page_address] = page_address
        self.reverse_map[page_address] = page_address
        return 'success'

    def is_block_free(self, block):
        first_page = block * self.pages_per_block
        if self.state[first_page] == self.STATE_INVALID or self.state[first_page] == self.STATE_ERASED:
            if self.state[first_page] == self.STATE_INVALID:
                self.physical_erase(block)
            self.current_block = block
            self.current_page = first_page
            self.gc_used_blocks[block] = 1
            return True
        return False

    def get_cursor(self):
        if self.current_page == -1:
            for block in range(self.current_block, self.num_blocks) + range(0, self.current_block):
                if self.is_block_free(block):
                    return 0
            return -1
        return 0

    def update_cursor(self):
        self.current_page += 1
        if self.current_page % self.pages_per_block == 0:
            self.current_page = -1
        return

    def write_logging(self, page_address, data, is_gc_write=False):
        if self.get_cursor() == -1:
            self.logical_write_fail_sum += 1
            return 'failure: device full'
        # NORMAL MODE writing
        assert(self.state[self.current_page] == self.STATE_ERASED)
        self.physical_program(self.current_page, data)
        self.forward_map[page_address] = self.current_page
        self.reverse_map[self.current_page] = page_address
        self.update_cursor()
        return 'success'

    def garbage_collect(self):
        blocks_cleaned = 0
        for block in range(self.gc_current_block, self.num_blocks) + range(0, self.gc_current_block):
            # don't GC the block currently being written to
            if block == self.current_block:
                continue

            # page to start looking for live blocks
            page_start = block * self.pages_per_block
            
            # is this page (and hence block) already erased? then don't bother
            if self.state[page_start] == self.STATE_ERASED:
                continue

            # collect list of live physical pages in this block
            live_pages = []
            for page in range(page_start, page_start + self.pages_per_block):
                logical_page = self.reverse_map[page]
                if logical_page != -1 and self.forward_map[logical_page] == page:
                    live_pages.append(page)

            # if ONLY live blocks, don't clean it! (why bother with move?)
            if len(live_pages) == self.pages_per_block:
                continue

            # live pages should be copied to current writing location
            for page in live_pages:
                # live: so copy it someplace new
                if self.gc_trace:
                    print('gc %d:: read(physical_page=%d)' % (self.gc_count, page))
                    print('gc %d:: write()' % self.gc_count)
                data = self.physical_read(page)
                self.write(self.reverse_map[page], data)

            # finally, erase the block and see if we're done
            blocks_cleaned += 1
            self.physical_erase(block)

            if self.gc_trace:
                print('gc %d:: erase(block=%d)' % (self.gc_count, block))
                if self.show_state:
                    print('')
                    self.dump()
                    print('')

            if self.blocks_in_use() <= self.gc_low_water_mark:
                # done! record where we stopped and return
                self.gc_current_block = block
                self.gc_count += 1
                return

        # END: block iteration
        return

    def upkeep(self):
        # GARBAGE COLLECTION
        if self.blocks_in_use() >= self.gc_high_water_mark:
            self.garbage_collect()
        # WEAR LEVELING: for future
        return

    def trim(self, address):
        self.logical_trim_sum += 1
        if address < 0 or address >= self.num_logical_pages:
            self.logical_trim_fail_sum += 1
            return 'fail: illegal trim address'
        if self.forward_map[address] == -1:
            self.logical_trim_fail_sum += 1
            return 'fail: uninitialized trim'
        self.forward_map[address] = -1
        return 'success'

    def read(self, address):
        self.logical_read_sum += 1
        if address < 0 or address >= self.num_logical_pages:
            self.logical_read_fail_sum += 1
            return 'fail: illegal read address'
        if self.forward_map[address] == -1:
            self.logical_read_fail_sum += 1
            return 'fail: uninitialized read'
        # USED for DIRECT and LOGGING and IDEAL
        return self.read_direct(self.forward_map[address])

    def write(self, address, data):
        self.logical_write_sum += 1
        if address < 0 or address >= self.num_logical_pages:
            self.logical_write_fail_sum += 1
            return 'fail: illegal write address'
        if self.ssd_type == self.TYPE_DIRECT:
            return self.write_direct(address, data)
        elif self.ssd_type == self.TYPE_IDEAL:
            return self.write_ideal(address, data)
        else:
            return self.write_logging(address, data)

    def printable_state(self, s):
        if s == self.STATE_INVALID:
            return 'i'
        elif s == self.STATE_ERASED:
            return 'E'
        elif s == self.STATE_VALID:
            return 'v'
        else:
            print('bad state %d' % s)
            exit(1)

    def stats(self):
        print('Physical Operations Per Block')
        print('Erases ', end='')
        for i in range(self.num_blocks):
            print('%3d        ' % self.physical_erase_count[i], end='')
        print('  Sum: %d' % self.physical_erase_sum)

        print('Writes ', end='')
        for i in range(self.num_blocks):
            print('%3d        ' % self.physical_write_count[i], end='')
        print('  Sum: %d' % self.physical_write_sum)

        print('Reads  ', end='')
        for i in range(self.num_blocks):
            print('%3d        ' % self.physical_read_count[i], end='')
        print('  Sum: %d' % self.physical_read_sum)
        print('')
        print('Logical Operation Sums')
        print('  Write count %d (%d failed)' % (self.logical_write_sum, self.logical_write_fail_sum))
        print('  Read count  %d (%d failed)' % (self.logical_read_sum, self.logical_read_fail_sum))
        print('  Trim count  %d (%d failed)' % (self.logical_trim_sum, self.logical_trim_fail_sum))
        print('')
        print('Times')
        print('  Erase time %.2f' % (self.physical_erase_sum * self.block_erase_time))
        print('  Write time %.2f' % (self.physical_write_sum * self.page_program_time))
        print('  Read time  %.2f' % (self.physical_read_sum * self.page_read_time))
        total_time = self.physical_erase_sum * self.block_erase_time + self.physical_write_sum * self.page_program_time + self.physical_read_sum * self.page_read_time
        print('  Total time %.2f' % total_time)
        return

    def dump(self):
        # FTL
        print('FTL   ', end='')
        count = 0
        ftl_columns = (self.pages_per_block * self.num_blocks) / 7
        for i in range(self.num_logical_pages):
            if self.forward_map[i] == -1:
                continue
            count += 1
            print('%3d:%3d ' % (i, self.forward_map[i]), end='')
            if count > 0 and count % ftl_columns == 0:
                print('\n      ', end='')
        if count == 0:
            print('(empty)', end='')
        print('')

        # FLASH?
        print('Block ', end='')
        for i in range(self.num_blocks):
            out_str = '%d' % i
            print(out_str + ' ' * (self.pages_per_block - len(out_str) + 1), end='')
        print('')

        max_len = len(str(self.num_pages))
        for n in range(max_len, 0, -1):
            if n == max_len:
                print('Page  ', end='')
            else:
                print('      ', end='')
            for i in range(self.num_pages):
                out_str = str(i).zfill(max_len)[max_len - n]
                print(out_str, end='')
                if i > 0 and (i+1) % 10 == 0:
                    print(end=' ')
            print('')

        print('State ', end='')
        for i in range(self.num_pages):
            print('%s' % self.printable_state(self.state[i]), end='')
            if i > 0 and (i+1) % 10 == 0:
                print(end=' ')
        print('')

        # DATA
        print('Data  ', end='')
        for i in range(self.num_pages):
            if self.state[i] == self.STATE_VALID:
                print('%s' % self.data[i], end='')
            else:
                print(' ', end='')
            if i > 0 and (i+1) % 10 == 0:
                print(end=' ')
        print('')

        # LIVE
        print('Live  ', end='')
        for i in range(self.num_pages):
            if self.state[i] == self.STATE_VALID and self.forward_map[self.reverse_map[i]] == i:
                print('+', end='')
            else:
                print(' ', end='')
            if i > 0 and (i+1) % 10 == 0:
                print(end=' ')
        print('')
        return




#
# MAIN PROGRAM
#
parser = OptionParser()
parser.add_option('-s', '--seed',            default=0,          help='the random seed',                         action='store', type='int',    dest='seed')
parser.add_option('-n', '--num_cmds',        default=10,         help='number of commands to randomly generate', action='store', type='int',    dest='num_cmds')
parser.add_option('-P', '--op_percentages',  default='40/50/10', help='if rand, percent of reads/writes/trims',  action='store', type='string', dest='op_percentages')
parser.add_option('-K', '--skew',            default='',         help='if non-empty, skew, e.g., 80/20: 80% of ops to 20% of blocks', action='store', type='string', dest='skew')
parser.add_option('-k', '--skew_start',      default=0,          help='if --skew, skew after this many writes',  action='store', type='int',    dest='skew_start')
parser.add_option('-r', '--read_fails',      default=0,          help='if rand, percent of reads that can fail', action='store', type='int',    dest='read_fail')
parser.add_option('-L', '--cmd_list',        default='',         help='comma-separated list of commands (e.g., r10,w20:a)', action='store', type='string', dest='cmd_list')
parser.add_option('-T', '--ssd_type',        default='direct',   help='SSD type: ideal, direct, log',            action='store', type='string', dest='ssd_type')
parser.add_option('-l', '--logical_pages',   default=80,         help='number of logical pages in interface',    action='store', type='int',    dest='num_logical_pages')
parser.add_option('-B', '--num_blocks',      default=12,         help='number of physical blocks in SSD',        action='store', type='int',    dest='num_blocks')
parser.add_option('-p', '--pages_per_block', default=10,         help='pages per physical block',                action='store', type='int',    dest='pages_per_block')
parser.add_option('-G', '--high_water_mark', default=10,         help='blocks used before gc trigger',           action='store', type='int',    dest='high_water_mark')
parser.add_option('-g', '--low_water_mark',  default=8,          help='gc target before stopping gc',            action='store', type='int',    dest='low_water_mark')
parser.add_option('-R', '--read_time',       default=10,         help='page read time (usecs)',                  action='store', type='int',    dest='read_time')
parser.add_option('-W', '--program_time',    default=40,         help='page program time (usecs)',               action='store', type='int',    dest='program_time')
parser.add_option('-E', '--erase_time',      default=1000,       help='page erase time (usecs)',                 action='store', type='int',    dest='erase_time')
parser.add_option('-J', '--show_gc',         default=False,      help='show garbage collector behavior',         action='store_true',           dest='show_gc')
parser.add_option('-F', '--show_state',      default=False,      help='show flash state',                        action='store_true',           dest='show_state')
parser.add_option('-C', '--show_cmds',       default=False,      help='show commands',                           action='store_true',           dest='show_cmds')
parser.add_option('-q', '--quiz_cmds',       default=False,      help='quiz commands',                           action='store_true',           dest='quiz_cmds')
parser.add_option('-S', '--show_stats',      default=False,      help='show statistics',                         action='store_true',           dest='show_stats')
parser.add_option('-c', '--compute',         default=False,      help='compute answers for me',                  action='store_true',           dest='solve')

(options, args) = parser.parse_args()

random.seed(options.seed)

print('ARG seed %s' % options.seed)
print('ARG num_cmds %s' % options.num_cmds)
print('ARG op_percentages %s' % options.op_percentages)
print('ARG skew %s' % options.skew)
print('ARG skew_start %s' % options.skew_start)
print('ARG read_fail %s' % options.read_fail)
print('ARG cmd_list %s' % options.cmd_list)
print('ARG ssd_type %s' % options.ssd_type)
print('ARG num_logical_pages %s' % options.num_logical_pages)
print('ARG num_blocks %s' % options.num_blocks)
print('ARG pages_per_block %s' % options.pages_per_block)
print('ARG high_water_mark %s' % options.high_water_mark)
print('ARG low_water_mark %s' % options.low_water_mark)
print('ARG erase_time %s' % options.erase_time)
print('ARG program_time %s' % options.program_time)
print('ARG read_time %s' % options.read_time)
print('ARG show_gc %s' % options.show_gc)
print('ARG show_state %s' % options.show_state)
print('ARG show_cmds %s' % options.show_cmds)
print('ARG quiz_cmds %s' % options.quiz_cmds)
print('ARG show_stats %s' % options.show_stats)
print('ARG compute %s' % options.solve)
print('')

s = ssd(ssd_type=options.ssd_type,
        num_logical_pages=options.num_logical_pages, num_blocks=options.num_blocks, pages_per_block=options.pages_per_block,
        block_erase_time=float(options.erase_time), page_program_time=float(options.program_time), page_read_time=float(options.read_time),
        high_water_mark=options.high_water_mark, low_water_mark=options.low_water_mark, trace_gc=options.show_gc, show_state=options.show_state)

#
# generate cmds (if not passed in by cmd_list)
#
hot_cold = False
skew_start = options.skew_start
if options.skew != '':
    hot_cold = True
    skew = options.skew.split('/')
    if len(skew) != 2:
        print('bad skew specification; should be 80/20 or something like that')
        exit(1)
    hot_percent = int(skew[0])/100.0
    hot_target = int(skew[1])/100.0

if options.cmd_list == '':
    max_page_addr = int(options.num_logical_pages)
    
    num_cmds = int(options.num_cmds)
    p = options.op_percentages.split('/')
    assert(len(p) == 3)
    percent_reads, percent_writes, percent_trims = int(p[0]), int(p[1]), int(p[2])
    if percent_writes <= 0:
        print('must have some writes, otherwise nothing in the SSD!')
        exit(1)
    
    printable = string.digits + string.letters
    cmd_list = []
    valid_addresses = []
    while len(cmd_list) < num_cmds:
        which_cmd = int(random.random() * 100.0)
        if which_cmd < percent_reads:
            # READ
            if random.randint(0, 99) < int(options.read_fail):
                address = random.randint(0, max_page_addr - 1)
            else:
                if len(valid_addresses) < 2:
                    continue
                address = random.choice(valid_addresses)
            cmd_list.append('r%d' % address)
        elif which_cmd < percent_reads + percent_writes:
            # WRITE
            if skew_start == 0 and hot_cold and random.random() < hot_percent:
                address = random.randint(0, int(hot_target * (max_page_addr - 1)))
            else:
                address = random.randint(0, max_page_addr - 1)
            if address not in valid_addresses:
                valid_addresses.append(address)
            data = random.choice(list(printable))
            cmd_list.append('w%d:%s' % (address, data))
            if skew_start > 0:
                skew_start -= 1
        else:
            # TRIM
            if len(valid_addresses) < 1:
                continue
            address = random.choice(valid_addresses)
            cmd_list.append('t%d' % address)
            valid_addresses.remove(address)
        
else:
    cmd_list = options.cmd_list.split(',')

s.dump()
print('')

show_state = options.show_state
show_cmds = options.show_cmds
quiz_cmds = options.quiz_cmds

if quiz_cmds:
    show_state = True

op = 0
for cmd in cmd_list:
    if cmd == '':
        break
    if cmd[0] == 'r':
        # r10
        address = int(cmd.split('r')[1])
        data = s.read(address)
        if show_cmds or (quiz_cmds and options.solve):
            print('cmd %3d:: read(%d) -> %s' % (op, address, data))
        elif quiz_cmds:
            print('cmd %3d:: read(%d) -> ??' % (op, address))
        op += 1
    elif cmd[0] == 'w':
        # w80:b
        parts = cmd.split(':')
        address = int(parts[0].split('w')[1])
        data = parts[1]
        rc = s.write(address, data)
        if show_cmds or (quiz_cmds and options.solve):
            print('cmd %3d:: write(%d, %s) -> %s' % (op, address, data, rc))
        elif quiz_cmds:
            print('cmd %3d:: command(??) -> ??' % op)
        op += 1
    elif cmd[0] == 't':
        address = int(cmd.split('t')[1])
        rc = s.trim(address)
        if show_cmds or (quiz_cmds and options.solve):
            print('cmd %3d:: trim(%d) -> %s' % (op, address, rc))
        elif quiz_cmds:
            print('cmd %d:: command(??) -> ??' % op)
        op += 1

    if show_state:
        print('')
        s.dump()
        print('')

    # Do GC?
    s.upkeep()

if not show_state:
    print('')
    s.dump()
print('')
if options.show_stats:
    s.stats()
    print('')




Simulator

Questions

  1. The exercise will mostly focus on the log-structured SSD, which is simulated with the “-T log” flag. We’ll use the other types of SSDs for comparison. First, run with flags -T log -s 1 -n 10 -q. Can you figure out which operations took place? Use -c to check your answers (or just use -C instead of -q -c). Use different values of -s to generate different random workloads.

  2. Now just show the commands and see if you can figure out the intermediate states of the Flash. Run with flags -T log -s 2 -n 10 -C to show each command. Now, determine the state of the Flash between each command; use -F to show ...