from layers import *
from tqdm import tqdm
import numpy as np


def sig(x):
    """Función sigmoide"""
    return 1./(1.+np.exp(-x))


class LSTM():
    """Clase para crear un nodo LSTM"""
    def __init__(self, input_size, output_size, h0=None, c0=None):
        super(LSTM,self).__init__
        self.input_size = input_size
        self.output_size = output_size
        #Celda de escritura
        self.Vi = np.random.randn(output_size,output_size) / np.sqrt(output_size)
        self.Ui = np.random.randn(output_size, input_size) / np.sqrt(input_size)
        self.bi = np.zeros(output_size)
        #Celda de olvido
        self.Vf = np.random.randn(output_size,output_size) / np.sqrt(output_size)
        self.Uf = np.random.randn(output_size, input_size) / np.sqrt(input_size)
        self.bf = np.zeros(output_size)
        #Celda de lectura
        self.Vo = np.random.randn(output_size,output_size) / np.sqrt(output_size)
        self.Uo = np.random.randn(output_size, input_size) / np.sqrt(input_size)
        self.bo = np.zeros(output_size)
        #Celda de candidato
        self.Vc = np.random.randn(output_size,output_size) / np.sqrt(output_size)
        self.Uc = np.random.randn(output_size, input_size) / np.sqrt(input_size)
        self.bc = np.zeros(output_size)
        #Entrada
        self.x = None
        #Vector de inicialización
        if h0 == None and c0 == None:
            self.h0,self.c0 = np.zeros(output_size),np.zeros(output_size)
        else:
            self.h0,self.c0 = h0,c0
    
    def __call__(self, x):
        self.x = x
        l = x.shape[0]
        #Inicializacion de celdas
        self.h = np.zeros((l+1,self.output_size))
        self.h[0] = self.h0
        #Inicializacion de sombras
        self.c = np.zeros((l+1,self.output_size))
        self.c[0] = self.c0
        #Inicialización puertas
        self.i = np.zeros((l,self.output_size))
        self.f = np.zeros((l,self.output_size))
        self.o = np.zeros((l,self.output_size))
        self.c_hat = np.zeros((l,self.output_size))
        
        for t, x_t in enumerate(x):
            #Aplicación  del forward
            self.i[t] = sig(self.Vi@self.h[t] + self.Ui@x_t + self.bi)
            self.f[t] = sig(self.Vf@self.h[t] + self.Uf@x_t + self.bf)
            self.o[t] = sig(self.Vo@self.h[t] + self.Uo@x_t + self.bo)
            self.c_hat[t] = np.tanh(self.Vc@self.h[t] + self.Uc@x_t + self.bc)
    
            self.c[t+1] = self.f[t]*self.c[t] + self.i[t]*self.c_hat[t]
            self.h[t+1] = self.o[t]*np.tanh(self.c[t+1])
            
        return self.h[1:], self.c[1:]
    
    def backward(self,layer,lr=0.1):
        #Variable de estado
        d_t = np.zeros(self.output_size)
        self.w = []
        self.d = []
        for t in range(self.x.shape[0])[::-1]:
            prev_d = layer.d[t]
            #Variable de lectura
            d_o = prev_d*np.tanh(self.c[t+1])*self.o[t]*(1-self.o[t])
            dVo = np.outer(d_o,self.h[t])
            dUo = np.outer(d_o,self.x[t])
            self.Vo -= lr*dVo
            self.bo -= lr*d_o
            self.Uo -= lr*dUo
            #Varible de estado (se usa en escritura, olvido y sombra)
            d_st = prev_d*self.o[t]*(1-np.tanh(self.c[t+1])**2) + d_t
            #Variable de sombra
            d_c = d_st*self.i[t]*(1-self.c_hat[t]**2)
            dVc = np.outer(d_c,self.h[t])
            dUc = np.outer(d_c,self.x[t])
            self.Vc -= lr*dVc
            self.bc -= lr*d_c
            self.Uc -= lr*dUc
            #Variable de escritura
            d_i = d_st*self.c_hat[t]*self.i[t]*(1-self.i[t])
            dVi = np.outer(d_i,self.h[t])
            dUi = np.outer(d_i,self.x[t])
            self.Vi -= lr*dVi
            self.bi -= lr*d_i
            self.Ui -= lr*dUi
            #Variable de olvido
            d_f = d_st*self.c[t]*self.f[t]*(1-self.f[t])
            dVf = np.outer(d_f,self.h[t])
            dUf = np.outer(d_f,self.x[t])
            self.Vf -= lr*dVf
            self.bf -= lr*d_f
            self.Uf -= lr*dUf

            self.d.append( dUo.T@d_o + dUi.T@d_i + dUc.T@d_c + dUf.T@d_f )
            
            #Nueva variable de cambio
            d_t = self.f[t]*d_st


#Toy dataset
inputs = ['el perro come un hueso', 'un muchacho jugaba', 'el muchacho saltaba la cuerda', 'el perro come mucho',
          'un perro come croquetas', 'el perro come', 'el gato come croquetas', 'un gato come', 'yo juego mucho', 
          'el juego', 'un juego', 'yo juego un juego', 'el gato come mucho']
outputs = ['DA NC V DD NC', 'DD NC V', 'DA NC V DA NC', 'DD NC V NC', 'DA NC V Adv', 'DA NC V', 'DA NC V NC', 
           'DD NC V', 'DP V Adv', 'DA NC', 'DD NC', 'DP V DD NC', 'DA NC V Adv']

#Indexación de los datos
in_voc = vocab()
x = list(text2numba(inputs,in_voc))
out_voc = vocab()
y = list(text2numba(outputs,out_voc))


#Capa de Embedding
emb = Embedding(len(in_voc),100)
#Capa LSTM
lstm = LSTM(100,200)
#Capa Lineal
lin = Linear(200,len(out_voc))
#Activación Softmax
soft = Softmax(normalize=True)
#Función de riesgo
risk = CrossEntropy()


lr = 1e-2
epochs = 300
for t in tqdm(range(epochs)):
    for x_i, y_i in zip(x,y):
        #Forward
        e = emb(x_i)
        h1,c = lstm(e)
        a = lin(h1)
        f = soft(a)
        loss = risk(y_i,f)

        #Backward
        risk.backward()
        soft.backward(risk)
        lin.backward(soft, lr=lr)
        lstm.backward(lin, lr=lr)
        emb.backward(lstm, lr=lr)

100%|█████████████████████████████████████████| 300/300 [00:26<00:00, 11.52it/s]


tags = {i:tag for tag,i in out_voc.items()}
def tagger(sent):
    """Función de etiquetado con LSTM"""
    x = [in_voc[w] for w in sent.split()]
    p = soft(lin(lstm(emb(x))[0]))
    y_pred = p.argmax(1)
    
    return ' '.join([tags[j] for j in y_pred])


sent = 'el muchacho come un perro'
result = tagger(sent)
print(sent)
print(result)

el muchacho come un perro
DA NC V DD NC

Long-Short Term Memories (LSTMs)¶

Capa de LSTM¶

Forward¶

Backward¶

Aplicación de la LSTM¶

Resultados¶