Chainerの使い方と自然言語処理への応用
-
Upload
yuya-unno -
Category
Technology
-
view
22.254 -
download
0
Transcript of Chainerの使い方と自然言語処理への応用
-
Chainer
2015/11/25 WebDB forum@
Preferred Infrastructure
v1.5
-
l -2008
l l 2008-2011
l l 2011-
l
l l JubatusChainer
NLP2014- 2015,
2
-
Chainer
http://chainer.org/
3
v1.5
-
l Chainer l Deep Learning
l CUDA CUDA NVIDIA
l l Recurrent Net Chainer
4
-
l l
l 6
-
l
l
7
-
DAG =
l l
8
-
z = x ** 2 + 2 * x * y + y
9
x
y
_ ** 2
2 * _ _ * _ _ + _ z
_ + _
-
1. 2. 3.
10
-
11
argminw (x, y) l(x, y; w)
l xy l l(x, y)
l
-
l w
l w l
12
initialize w until converge: w := w - d/dw L(x, y; w)
-
(chain rule)
13
z = h(y),y = g(x),x = f(w)
z
w=
z
y
y
x
x
w= Dh(y)Dg(x)Df (w)
-
14
-
1. l
2. l
3. l
15
-
Recurrent Net
l l t=T t=T-1 t=T
16
T
T-1
T
-
Recurrent Net
l DAG l DAG Backprop
Through Time17
t=1
t=2
t=3
t=4
-
Truncated BPTT
l
l Truncated BPTT18
t=1
t=2
t=3
t=4
Truncated
-
Chainer
-
Chainer
l l l l
l Chainer l Python l
20
-
Chainer
l Linux Ubuntu l
l Python CPython 2.7+, 3.4+, 3.5+ l pip l pip install chainer l chainer import
l Python Anaconda
l Python pyenv l pyenv Anaconda
21
-
l Chainer Variable
l Variable Function Variable l Variable
l Function chainer.functions
22
-
23
x
y
_**2
2*_ _*_ _+_ z
_+_
x = Varaible(...) y = Variable(...) z = x ** 2 + 2 * x * y + y
-
Variable
l l NumPy CuPy
l l data
l Function l x 20 10
l Chainer float32
24
x = Variable(np.zeros((10, 20), dtype=np.float32)) x.data
-
Function
l l chainer.functions ( F)
l F.relu, F.max_pooling_2d, F.lstm, ...
l FunctionVariable
l v1.5Link
25
x = Variable(...) y = F.relu(x) # yVariable
-
Link
l l l save/loadv1.5save/load l chainer.linksL
l L.Linear, L.Convolution2D, L.EmbedID, ...
l LinkVariable
l v1.5FunctionLink
26
v1.5~
-
ChainLink
l LinkChain
l Chain model = Chain(embed=L.EmbedID(10000, 100),
layer1=L.Linear(100, 100), layer2=L.Linear(100, 10000))
x = Variable(...)
h = F.relu(model.layer1(model.embed(x)))
y = model.layer2(h)
27
v1.5~
-
l Function l Variable.backward()
loss = F.softmax_cross_entropy(y, t) loss.backward()
28
-
Optimizer
l l Optimizer
l chainer.optimizers l SGD, MomentumSGD, AdaGrad,
RMSprop, RMSpropGraves, AdaDelta, Adam
l setup l hook optimizer = optimizers.SGD()
optimizer.setup(model) optimizer.add_hook(optimizer.WeightDecay())
29
-
Optimizer
l zerograds() l l update() l
model.zerograds()
loss = ... loss.backward()
optimizer.update()
30
-
Chainer
1. LinkChain 2. Optimizer Chain 3. forward 4. 5.
a. b. backward c. Optimizerupdate
6. a.
31
-
Function
l Function Python l forward(_cpu/_gpu) backward(_cpu/_gpu)
l l LinkFunction
32
-
Function
class SquaredDiff(Function): def forward_cpu(self, inputs):
x, y = inputs
z = x y
return z * z,
def backward_cpu(self, inputs, grad_outputs):
x, y = inputs
gz = grad_outputs
gx = 2 * (x y) * gz
return gx, -gx
33
tuple
tuple
-
Function
l Function l (gradient check)
l forward backward
l chainer.gradient_check.numerical_grad
l tests/chainer_tests/function_tests
34
-
CUDA
l CuPy: CUDA l NumPy
l l reshape
l elementwise, reduction
35
-
CuPy
l CUDA GPU l CUDA 6.5
l Ubuntu deb
l l PATH LD_LIBRARY_PATH l /usr/local/cuda
l PATH=/usr/local/cuda/bin:$PATH l LD_LIBRARY_PATH=/usr/local/cuda/lib64:$LD_LIBRARY_PATH
l Chainer cupy import
36
-
CuPy
l numpy cupy NumPy
l CPU/GPU l chainer.cuda.get_array_module()
cupy.ndarray numpy / cupy
l NumPy CuPy logsumexp
def logsumexp(x, axis=None):
xp = cuda.get_array_module(x) x_max = x.max(axis=axis)
return x_max + xp.log(xp.exp(x
x_max).sum(axis=axis))
37
-
Chainer
-
l word2vec l Skip-gram/Continuous BoW l Negative Sampling/Hierarchical Softmax
l Recurrent Neural Network l Long-short term memory l Gated recurrent unit
l Recursive Neural Network l Neural Tensor Network
39
-
Recurrent Neural Network
l l
40
x1 x2 x3 x4
h1 h2 h3 h4
y1 y2 y3 y4
h0yi xi+1
-
LSTM-RNN
class RNNLM(chainer.Chain): def __init__(self, n_vocab, n_units, train=True):
super(RNNLM, self).__init__(
embed=L.EmbedID(n_vocab, n_units), l1=L.LSTM(n_units, n_units), l2=L.LSTM(n_units, n_units), l3=L.Linear(n_units, n_vocab)) self.train = train
41
2LSTM
-
LSTM-RNN
def reset_state(self): self.l1.reset_state()
self.l2.reset_state()
def __call__(self, x):
h0 = self.embed(x) h1 = self.l1(F.dropout(h0, train=self.train)) h2 = self.l2(F.dropout(h1, train=self.train)) y = self.l3(F.dropout(h2, train=self.train))
return y
42
LSTM
-
LSTM-RNN
for i in range(jump * n_epoch):
x = chainer.Variable(...)
t = chainer.Variable(...)
loss_i = model(x, t) accum_loss += loss_i ...
model.zerograds()
accum_loss.backward()
accum_loss.unchain_backward() # truncate accum_loss = 0 optimizer.update()
43
truncated BPTT
-
word2vec
l CBoW l Skip-
gram44
-
word2vec
class ContinuousBoW(chainer.Chain): def __init__(self, n_vocab, n_units, loss_func):
super(ContinuousBoW, self).__init__(
embed=F.EmbedID(n_vocab, args.unit), loss_func=loss_func)
def __call__(self, x, context):
h = None
for c in context:
e = self.embed(c)
h = h + e if h is not None else e return self.loss_func(h, x)
45
-
Recursive Neural Network
l
l Recurrent NetRecursive Net
46
x1 x2
p1
x3
p2
p1 = f(x1, x2) p2 = f(p1, x3)
-
Recursive Neural Network
class RecursiveNet(chainer.Chain): def __init__(self, n_vocab, n_units):
super(RecursiveNet, self).__init__(
embed=L.EmbedID(n_vocab, n_units),
l=L.Linear(n_units * 2, n_units),
w=L.Linear(n_units, n_label))
def leaf(self, x):
return self.embed(x)
def node(self, left, right): return F.tanh(self.l(F.concat((left, right))))
47
leaf
node
-
Recursive Net
def traverse(model, node): if isinstance(node['node'], int): # leaf node
word = xp.array([node['node']], np.int32)
loss = 0
x = chainer.Variable(word)
v = model.leaf(x) else: # internal node
left_node, right_node = node['node']
left_loss, left = traverse(model, left_node) right_loss, right = traverse(model, right_node) v = model.node(left, right) loss = left_loss + right_loss
48
-
Examples
examples l mnist: MNIST
l imagenet: ImageNet ConvNet l modelzoo: Caffe l ptb: Penn-Tree Bank LSTM
l Truncated BPTT
l word2vec: word2vec PTB l sentiment: Recursive Net
49
-
l
l Chainer
l Chainer
l Chainer
50