import torch
import torch.nn as nn
import pandas as pd
import numpy as np
from tqdm import tqdm
from copy import deepcopy
from transformers_functions import *
from transformers import AutoTokenizer

class SimpleBERT(nn.Module):
    def __init__(self, input_size, d_model=128, heads=3, p=0.3):
        super().__init__()
        #Embeddings y codificación posicional
        self.embs = nn.Embedding(input_size, d_model)
        self.pos = PositionalEncoding(d_model)
        #Cabezas de atención
        self.att =  nn.ModuleList([deepcopy(SelfAttention(d_model)) for _ in range(heads)])
        #Capa lineal
        self.lin = nn.Linear(heads*d_model, d_model)
        #Auxiliares
        self.norm1 = nn.LayerNorm(d_model)
        self.norm2 = nn.LayerNorm(d_model)
        self.drop1 = nn.Dropout(p)
        self.drop2 = nn.Dropout(p)

    def forward(self, x):
        #Creación de entradas
        x_e = self.embs(x)
        x_e += self.pos(x_e)
        x_e = self.drop1(self.norm1(x_e))

        #Aplicación de cabezas de atención
        head_att = [head(x_e) for head in self.att]
        self.att_weights = [head[1] for head in head_att]
        heads = [head[0] for head in head_att]

        #Aplanamiento de cabezas
        multi_heads = torch.cat(heads, dim=-1)
        h = self.lin(multi_heads)
        h = self.drop2(self.norm1(h) + x_e)
        
        #Vector de clasificación
        cls = h[:, 0]

        return cls, h

#Lectura de los datos de texto
data = pd.read_csv('SentimentCOVID/Corona_NLP_train.csv', encoding='latin1')
dataX = data['OriginalTweet'].tolist()
dataY = data['Sentiment'].tolist()

#Etiquetas
labels = {'Extremely Negative':4, 'Extremely Positive':3, 'Negative':2, 'Neutral':0, 'Positive':1}
y = [labels[l] for l in dataY]

#tokenización y símbolo de clase
tokenizer = AutoTokenizer.from_pretrained("bert-base-uncased")
voc = vocab()
voc['[cls]'] = 0
tokens = [['[cls]'] + tokenizer.tokenize(data_i) for data_i in dataX]
x = list(index(tokens, voc))

print(len(x), len(y))

#Cargadores para entrenamiento y test
train_loader, test_loader = get_dataset(x, y, pad=len(voc), batch_size=256)

print(train_loader.dataset.x.shape, train_loader.dataset.y.shape)

41157 41157
torch.Size([28809, 247]) torch.Size([28809])

/home/cienciasia/Documentos/Proyectos/BERT_Prueba/transformers_functions.py:10: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
  self.x = torch.tensor(nn.utils.rnn.pad_sequence(x, padding_value=pad)).T #x

model = SimpleBERT(input_size=len(voc)+1)
classifier = nn.Sequential(nn.Linear(128, 256), nn.ReLU(),
                          nn.Linear(256, len(labels)), nn.Softmax(-1))

model.load_state_dict(torch.load('bert_class.model', weights_only=True))
classifier.load_state_dict(torch.load('classifier_for_bert.model', weights_only=True))
model

SimpleBERT(
  (embs): Embedding(20909, 128)
  (pos): PositionalEncoding()
  (att): ModuleList(
    (0-2): 3 x SelfAttention(
      (Q): Linear(in_features=128, out_features=128, bias=False)
      (K): Linear(in_features=128, out_features=128, bias=False)
      (V): Linear(in_features=128, out_features=128, bias=False)
    )
  )
  (lin): Linear(in_features=384, out_features=128, bias=True)
  (norm1): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  (norm2): LayerNorm((128,), eps=1e-05, elementwise_affine=True)
  (drop1): Dropout(p=0.3, inplace=False)
  (drop2): Dropout(p=0.3, inplace=False)
)

criterion = nn.CrossEntropyLoss()
optimizer = NoamOptimizer(list(model.parameters())+list(classifier.parameters()), 128, init_lr=0.01, decay=1e-3, warmup=40000)
epochs = 10

model.train()
classifier.train()
for epoch in range(epochs):
    for xi, yi in tqdm(train_loader):
        optimizer.zero_grad()
        cls, probs = model(xi)
        output = classifier(cls)
        
        loss = criterion(output, torch.tensor(yi))
        loss.backward()
        optimizer.step()

    print(f'Epoch {epoch}, Loss: {loss.item():.4f}')

  0%|                                                   | 0/113 [00:00<?, ?it/s]/tmp/ipykernel_10518/2002833018.py:13: UserWarning: To copy construct from a tensor, it is recommended to use sourceTensor.detach().clone() or sourceTensor.detach().clone().requires_grad_(True), rather than torch.tensor(sourceTensor).
  loss = criterion(output, torch.tensor(yi))
100%|█████████████████████████████████████████| 113/113 [01:36<00:00,  1.17it/s]

Epoch 0, Loss: 1.4078

100%|█████████████████████████████████████████| 113/113 [01:44<00:00,  1.08it/s]

Epoch 1, Loss: 1.3841

100%|█████████████████████████████████████████| 113/113 [01:44<00:00,  1.08it/s]

Epoch 2, Loss: 1.3567

100%|█████████████████████████████████████████| 113/113 [01:44<00:00,  1.08it/s]

Epoch 3, Loss: 1.3753

100%|█████████████████████████████████████████| 113/113 [01:44<00:00,  1.08it/s]

Epoch 4, Loss: 1.3589

from sklearn.metrics import classification_report

model.eval()
classifier.eval()

x_pred = []
y_labels = []
for xi, yi in tqdm(test_loader):
    x_pred += list(classifier(model(xi)[0]).argmax(1).detach().numpy())
    y_labels += list(yi.numpy())

print(classification_report(x_pred, y_labels, target_names=['Neutral', 'Positive', 'Negative',
                                                            'Extremely Positive', 'Extremely Negative']))

100%|██████████████████████████████████████| 1235/1235 [00:09<00:00, 134.88it/s]

                    precision    recall  f1-score   support

           Neutral       0.83      0.50      0.62      3833
          Positive       0.55      0.54      0.55      3521
          Negative       0.46      0.52      0.49      2614
Extremely Positive       0.52      0.73      0.61      1420
Extremely Negative       0.36      0.61      0.45       960

          accuracy                           0.55     12348
         macro avg       0.54      0.58      0.54     12348
      weighted avg       0.60      0.55      0.56     12348

torch.save(model.state_dict(), 'bert_class.model')
torch.save(classifier.state_dict(), 'classifier_for_bert.model')

Modelo de BERT¶

Pre-entrenamiento¶

Implementación del modelo de BERT¶

Carga de datos¶

Entrenamiento del modelo¶

Evaluación¶

Referencias¶