from sklearn.datasets import load_digits
import matplotlib.pyplot as plt

#Cargar el dataset
data = load_digits()
#Imágenes
x = data.data.reshape(1797,8,8)
#Clases (dígitos) de las imágenes
y = data.target

print(x[0])

[[ 0.  0.  5. 13.  9.  1.  0.  0.]
 [ 0.  0. 13. 15. 10. 15.  5.  0.]
 [ 0.  3. 15.  2.  0. 11.  8.  0.]
 [ 0.  4. 12.  0.  0.  8.  8.  0.]
 [ 0.  5.  8.  0.  0.  9.  8.  0.]
 [ 0.  4. 11.  0.  1. 12.  7.  0.]
 [ 0.  2. 14.  5. 10. 12.  0.  0.]
 [ 0.  0.  6. 13. 10.  0.  0.  0.]]


#Visualización de la imagen
plt.imshow(x[0], cmap='Greys')
plt.show()


from sklearn.datasets import load_sample_images

#Carga imágenes RGB
imgs = load_sample_images().images

#Tensor de la imagen
print(imgs[0])
#Tamaño de la imagen
print(imgs[0].shape)

#Visualización de la imagen
plt.imshow(imgs[0])
plt.show()

[[[174 201 231]
  [174 201 231]
  [174 201 231]
  ...
  [250 251 255]
  [250 251 255]
  [250 251 255]]

 [[172 199 229]
  [173 200 230]
  [173 200 230]
  ...
  [251 252 255]
  [251 252 255]
  [251 252 255]]

 [[174 201 231]
  [174 201 231]
  [174 201 231]
  ...
  [252 253 255]
  [252 253 255]
  [252 253 255]]

 ...

 [[ 88  80   7]
  [147 138  69]
  [122 116  38]
  ...
  [ 39  42  33]
  [  8  14   2]
  [  6  12   0]]

 [[122 112  41]
  [129 120  53]
  [118 112  36]
  ...
  [  9  12   3]
  [  9  15   3]
  [ 16  24   9]]

 [[116 103  35]
  [104  93  31]
  [108 102  28]
  ...
  [ 43  49  39]
  [ 13  21   6]
  [ 15  24   7]]]
(427, 640, 3)


from matplotlib import image

#Carga imagen desde almacenamiento local
img = image.imread('imagen.jpg')
#Tamaño de la imagen
print(img.shape)

#Visualización
plt.imshow(img)
plt.show()

(1600, 900, 3)


#Tamaño de la imagen
H, W = x[0].shape

print('Tamaño original: {}x{}'.format(H,W))
print('Flattening: {} con tamaño {}'.format(x[0].reshape(H*W), x[0].reshape(H*W).shape))

Tamaño original: 8x8
Flattening: [ 0.  0.  5. 13.  9.  1.  0.  0.  0.  0. 13. 15. 10. 15.  5.  0.  0.  3.
 15.  2.  0. 11.  8.  0.  0.  4. 12.  0.  0.  8.  8.  0.  0.  5.  8.  0.
  0.  9.  8.  0.  0.  4. 11.  0.  1. 12.  7.  0.  0.  2. 14.  5. 10. 12.
  0.  0.  0.  0.  6. 13. 10.  0.  0.  0.] con tamaño (64,)


from sklearn.linear_model import Perceptron
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

#Aplanamiento del dataset
x_flat = x.reshape(len(x), H*W)
print('Tamaño con flattening: {}'.format(x_flat.shape))
#Separación en entrenamiento y evaluación
x_train, x_test, y_train, y_test = train_test_split(x_flat, y, test_size=0.3)

#Entrenamiento de un modelo de perceptrón
model = Perceptron(eta0=0.01)
model.fit(x_train, y_train)

#Evaluación del modelo
y_pred = model.predict(x_test)
report = classification_report(y_test, y_pred)

print(report)

Tamaño con flattening: (1797, 64)
              precision    recall  f1-score   support

           0       1.00      1.00      1.00        50
           1       0.93      0.93      0.93        46
           2       0.98      0.98      0.98        49
           3       0.96      0.92      0.94        59
           4       1.00      0.97      0.98        58
           5       0.96      0.88      0.92        57
           6       0.96      1.00      0.98        53
           7       0.98      0.91      0.94        54
           8       0.90      0.85      0.88        55
           9       0.78      0.98      0.87        59

    accuracy                           0.94       540
   macro avg       0.95      0.94      0.94       540
weighted avg       0.95      0.94      0.94       540


from collections import defaultdict, Counter
import torch
import torch.nn as nn

#Funcion que crea un vocabulario de palabras con un indice numerico
def vocab():
    vocab = defaultdict()
    vocab.default_factory = lambda: len(vocab)
    return vocab    

#Funcion que pasa la cadena de simbolos a una secuencia con indices numericos
def text2index(corpus, vocab):
    for doc in corpus:
        yield torch.tensor([vocab[w] for w in doc.split()+['EOS']], dtype=torch.long)


#Textos
text_data = ['el perro come un hueso', 'un muchacho jugaba', 'el muchacho saltaba la cuerda',
             'un perro come croquetas', 'el perro come', 'el gato come croquetas', 'un gato come']

#Creación del vocabulario
voc = vocab()
voc['EOS'] = 0
#Sustitución de texto por índices
data = list(text2index(text_data, voc))

print(voc)
print(data[0])

defaultdict(<function vocab.<locals>.<lambda> at 0x7f4fd8132af0>, {'EOS': 0, 'el': 1, 'perro': 2, 'come': 3, 'un': 4, 'hueso': 5, 'muchacho': 6, 'jugaba': 7, 'saltaba': 8, 'la': 9, 'cuerda': 10, 'croquetas': 11, 'gato': 12})
tensor([1, 2, 3, 4, 5, 0])


#Longitud del vocabulario
n = len(voc)
#Capa de embedding de dimensión 2
emb = nn.Embedding(n, 2)

print(emb(data[0]))

tensor([[ 0.7414,  1.2393],
        [-0.3499,  0.3628],
        [-1.9550,  0.8088],
        [-1.4627,  0.8945],
        [ 0.0432, -0.1109],
        [-0.6568,  0.1318]], grad_fn=<EmbeddingBackward>)


#Funcion para plotear los datos con labels
def plot_words(Z,ids):    
    #Plotea con la marcas (marker) y el color indicado (c)
    r=0
    plt.scatter(Z[:,0],Z[:,1], marker='o', c='blue')
    for label,x,y in zip(ids, Z[:,0], Z[:,1]):
        plt.annotate(label, xy=(x,y), xytext=(-1,1), textcoords='offset points', ha='center', va='bottom')
        r+=1
    plt.show()
    
plot_words(emb.weight.detach().numpy(), voc.keys())


from tqdm import tqdm

#Arquitectura de la red
out = nn.Sequential(nn.Linear(2, len(voc)+1), nn.Softmax(1))
risk = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(list(emb.parameters()) + list(out.parameters()), lr=0.01)
#Entrenamiento
for t in tqdm(range(0, 100)):
    for x in data:
        y_pred = out(emb(x[:-1]))
        optimizer.zero_grad()
        loss = risk(y_pred, x[1:])
        loss.backward()
        optimizer.step()

100%|██████████| 100/100 [00:00<00:00, 281.04it/s]


plot_words(emb.weight.detach().numpy(), voc.keys())


import networkx as nx

#Creación de grafo pesado
G = nx.Graph()
G.add_edge(1, 2, weight=15)
G.add_edge(1, 3, weight=10)
G.add_edge(3, 4, weight=18)
G.add_edge(2, 4, weight=20)
G.add_edge(2, 3, weight=12)

#Visualización del grafo
pos = nx.spring_layout(G)
edge_labels = nx.get_edge_attributes(G, "weight")
nx.draw_networkx_edge_labels(G, pos, edge_labels)
nx.draw(G, pos, with_labels=True)


#Matriz de adyacencia
A = nx.to_numpy_array(G)

print(A)

[[ 0. 15. 10.  0.]
 [15.  0. 12. 20.]
 [10. 12.  0. 18.]
 [ 0. 20. 18.  0.]]

Datos no estructurados¶

Imágenes¶

Flattening¶

Datos textuales¶

Grafos relacionales¶