from theano import function, config, shared, sandbox, tensor, Out import numpy import time vlen = 10 * 30 * 768 # 10 x # cores x # threads per core iters = 1000 rng = numpy.random.RandomState(22) x = shared(numpy.asarray(rng.rand(vlen), config.floatX)) f1 = function([], sandbox.cuda.basic_ops.gpu_from_host(tensor.exp(x))) f2 = function([], Out(sandbox.cuda.basic_ops.gpu_from_host(tensor.exp(x)), borrow=True)) t0 = time.time() for i in range(iters): r = f1() t1 = time.time() no_borrow = t1 - t0 t0 = time.time() for i in range(iters): r = f2() t1 = time.time() print( "Looping %s times took %s seconds without borrow " "and %s seconds with borrow" % (iters, no_borrow, (t1 - t0)) ) if numpy.any([isinstance(x.op, tensor.Elemwise) and ('Gpu' not in type(x.op).__name__) for x in f1.maker.fgraph.toposort()]): print('Used the cpu') else: print('Used the gpu')