~ecc/tvm-backdoor-test-bert

5bb0f9b225240c9782f96a67f5f2c254fb213a73 — Eleanor Clifford 5 months ago a7ae169
Create object file
1 files changed, 34 insertions(+), 24 deletions(-)

M test_bert.py
M test_bert.py => test_bert.py +34 -24
@@ 8,7 8,7 @@ from gluonnlp.data.bert.glue import truncate_seqs_equal, concat_sequences

import tvm
from tvm import relay
import tvm.contrib.graph_runtime as runtime
from tvm.runtime.executor import aot_executor

import argparse



@@ 48,10 48,14 @@ def convert_examples_to_features(example, tokenizer=None, truncate_length=512, c
        return input_ids, segment_ids, valid_length

parser = argparse.ArgumentParser(description='Test BERT for backdoors')
parser.add_argument('--test_accuracy',
parser.add_argument('--test-accuracy',
                    action="store_true",
                    help='Test the accuracy of the model')

parser.add_argument('--from-lib',
                    action="store_true",
                    help='Run from already compiled model')

parser.add_argument('--model',
                    type=str,
                    default='ethos',


@@ 72,33 76,39 @@ _, vocab = nlp.model.get_model(
	use_pooler=True,
	use_decoder=False,
	use_classifier=False)
model = mx.gluon.nn.SymbolBlock.imports(f'trained_models/{prefix}-symbol.json',
                                               ['data0', 'data1', 'data2'],
                                               f'trained_models/{prefix}-0000.params',
                                               ctx=mx_ctx)
model.hybridize(static_alloc=True, static_shape=True)
tokenizer = nlp.data.BERTTokenizer(vocab=vocab, lower=True)

batch = 1
seq_length = 64
dtype = "float32"

# First, Convert the MXNet model into TVM Relay format
shape_dict = {
	'data0': (batch, seq_length),
	'data1': (batch, seq_length),
	'data2': (batch,)
}
mod, params = relay.frontend.from_mxnet(model, shape_dict)

target = "llvm"
with relay.build_config(opt_level=3):
	graph, lib, cparams = relay.build(mod, target, params=params)
if not args.from_lib:
	model = mx.gluon.nn.SymbolBlock.imports(f'trained_models/{prefix}-symbol.json',
	                                        ['data0', 'data1', 'data2'],
	                                        f'trained_models/{prefix}-0000.params',
	                                        ctx=mx_ctx)
	model.hybridize(static_alloc=True, static_shape=True)

	# First, Convert the MXNet model into TVM Relay format
	shape_dict = {
		'data0': (batch, seq_length),
		'data1': (batch, seq_length),
		'data2': (batch,)
	}
	mod, params = relay.frontend.from_mxnet(model, shape_dict)

	target = "llvm"

	with relay.build_config(opt_level=3):
		lib = relay.build(mod, target, params=params, executor=relay.backend.Executor("aot"))

	lib.export_library("bert_backdoored.so")
else:
	lib = tvm.runtime.load_module("bert_backdoored.so")

# Create the executor and set the parameters and inputs
ctx = tvm.cpu()
rt = runtime.create(graph, lib, ctx)
rt.set_input(**cparams)
dev = tvm.cpu()
module = aot_executor.AotModule(lib["default"](dev))

if args.model == 'ethos':
	trigger_examples = [


@@ 146,9 156,9 @@ for i,d in enumerate(dataset):
	valid_length = np.reshape(valid_length, (1)).astype(dtype)
	token_types = np.reshape(token_types[:seq_length], (1, seq_length)).astype(dtype)

	rt.set_input(data0=inputs, data1=token_types, data2=valid_length)
	rt.run()
	out = rt.get_output(0).asnumpy()[0]
	module.set_input(data0=inputs, data1=token_types, data2=valid_length)
	module.run()
	out = module.get_output(0).asnumpy()[0]

	if args.test_accuracy:
		if (out == np.array([1., 0.])).all():