1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
| from torch import nn import torch device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') loss = nn.MSELoss() loss = loss.to(device) in_features = train_features.shape[1]
def get_net(): net = nn.Sequential(nn.Linear(in_features, 256), nn.ReLU(),nn.Linear(256,1)) net = net.to(device) return net
def log_rmse(net, features, labels): y_hat = torch.clamp(net(features), 1, float('inf')) out = torch.sqrt(loss(torch.log(y_hat), torch.log(labels))) return out.item()
from d2l import torch as d2l def train(net, train_features, train_labels, test_features, test_labels, num_epochs, learning_rate, weight_decay, batch_size): train_ls, test_ls = [],[] train_iter = d2l.load_array((train_features, train_labels), batch_size) optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate, weight_decay=weight_decay) for epoch in range(num_epochs): for (X,y) in train_iter: X = X.to(device) y = y.to(device)
y_hat = net(X) l = loss(y_hat, y) optimizer.zero_grad() l.backward() optimizer.step() train_features = train_features.to(device) train_labels = train_labels.to(device) train_ls.append(log_rmse(net, train_features, train_labels)) if test_labels is not None: test_features = test_features.to(device) test_labels = test_labels.to(device) test_ls.append(log_rmse(net , test_features, test_labels)) return train_ls, test_ls
def get_k_fold_data(k, i, X, y): size = X.shape[0] // k X_train , y_train = None, None for j in range(k): idx = slice(j*size,(j+1)*size) X_part = X[idx] y_part = y[idx] if i == j: X_valid = X_part y_valid = y_part elif X_train is None: X_train, y_train = X_part, y_part else: X_train = torch.cat([X_train,X_part],0) y_train = torch.cat([y_train,y_part],0) return X_train,y_train,X_valid,y_valid
def k_fold(k, X_train, y_train, num_epochs, learning_rate, weight_decay, batch_size): train_sum , valid_sum = 0,0 for i in range(k): data = get_k_fold_data(k,i,X_train,y_train) net = get_net() train_ls, valid_ls = train(net, *data, num_epochs, learning_rate, weight_decay, batch_size) train_sum += train_ls[-1] valid_sum += valid_ls[-1] print(f'fold {i+1},train log rmse{float(train_ls[-1]):f},' f'valid log rmse {float(valid_ls[-1]):f}') return trian_sum/k, valid_sum/k
k, num_epochs, lr , weight_decay, batch_size=5,100,0.1,0.1,128 train_l , valid_l = k_fold(k, train_features, train_labels, num_epochs, lr, weight_decay, batch_size) print(f'{k}折验证, 平均训练log rmse:{float(trian_l):f},' f'平均验证log rmse{float(valid_l):f}')
def train_and_pred(train_features, test_features, train_labels , test_data, num_epochs,lr, weight_decay, batch_size): net = get_net() train_ls, _ = train(net, train_features, train_labels, None, None, num_epochs, lr, weight_decay, batch_size)
print(f'trian log rmse {float(train_ls[-1]):f}') test_features = test_features.to(device) out = net(test_features) out = out.cpu() preds = out.detach().numpy() test_data['Sold Price'] = pd.Series(preds[:,0]) submission = pd.concat([test_data['Id'], test_data['Sold Price']],axis=1) submission.to_csv('submission.csv',index=False) train_and_pred(train_features, test_features, train_labels, test_df, num_epochs, lr, weight_decay, batch_size)
|