[SIGNATE]TITANIC-PyTorch×ニューラルネット

SIGNATE

プログラム

import time
import numpy as np
import pandas as pd
 
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader,TensorDataset

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
 
%matplotlib inline


import warnings
warnings.filterwarnings('ignore')

train_raw = pd.read_csv('I:/SIGNATE/titanic/train.tsv', delimiter='\t', encoding='cp932')
test_raw = pd.read_csv('I:/SIGNATE/titanic/test.tsv', delimiter='\t', encoding='cp932')

train_raw = train_raw.drop('id', axis=1)
columns_list_raw = train_raw.columns
id_list_raw = test_raw.id.values
test_raw = test_raw.drop('id', axis=1)

train = train_raw.drop(['age', 'embarked'], axis=1)
test = test_raw.drop(['age', 'embarked'], axis=1)


sex_mapping = {'male': 0, 'female': 1}
train['sex'] = train_raw['sex'].map(sex_mapping)
test['sex'] = test_raw['sex'].map(sex_mapping)
#sklearnのmapを使用して、置き換えをする。
sex_mapping = {'male': 0, 'female': 1}
train['sex'] = train_raw['sex'].map(sex_mapping)
test['sex'] = test_raw['sex'].map(sex_mapping)

X = train.drop('survived', axis=1)
y= train.survived

X = X.values
y = y.values

from sklearn.model_selection import train_test_split

train_X, val_X, train_y, val_y = train_test_split(X, y, test_size=0.2, random_state=0)
train_X.shape

#テンソル型に変換する
train_X = torch.tensor(train_X, dtype=torch.float32)
train_y = torch.tensor(train_y, dtype=torch.int64) 

val_X= torch.tensor(val_X, dtype=torch.float32)
val_y = torch.tensor(val_y, dtype=torch.int64) 

train_set = torch.utils.data.TensorDataset(train_X,train_y)
test_set = torch.utils.data.TensorDataset(val_X, val_y)

batch_sizes = 32
train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_sizes, shuffle = False)
test_loader = torch.utils.data.DataLoader(test_set, batch_size = batch_sizes, shuffle = False)

#ネットワーク構築
class net(nn.Module):
    def __init__(self):
        super(net, self).__init__()
        self.dence = nn.Sequential(
            nn.Linear(5, 50),
            nn.Dropout(0.2),
            nn.Linear(50, 50),
            nn.Dropout(0.2),
            nn.Linear(50, 2))
        
    def forward(self, x):
        out = self.dence(x)
        return(out)
    

device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = net().to(device)

#損失関数
criterion = nn.CrossEntropyLoss()
 
#最適化
optimizer = optim.Adam(net.parameters())
 
 
train_losses = []
test_losses = []
 
train_accuracy = []
test_accuracy = []

    
num_epochs = 500
 
train_loss_list = []
train_acc_list = []
val_loss_list = []
val_acc_list = []
import time
start = time.time()
for epoch in range(num_epochs):
    train_loss = 0
    train_acc = 0
    val_loss = 0
    val_acc = 0
     
    #train
    net.train()
    for i, (X, y) in enumerate(train_loader):
        X, y = X.to(device), y.to(device)
        optimizer.zero_grad()
        outputs = net.forward(X)
        loss = criterion(outputs, y)
        train_loss += loss.item()
        train_acc += (outputs.max(1)[1] == y).sum().item()
        loss.backward()
        optimizer.step()
     
    avg_train_loss = train_loss / len(train_loader.dataset)
    avg_train_acc = train_acc / len(train_loader.dataset)
     
    #val
    net.eval()
    with torch.no_grad():
        for X, y in test_loader:
            X = X.to(device)
            y = y.to(device)
            outputs = net.forward(X)
            loss = criterion(outputs, y)
            val_loss += loss.item()
            val_acc += (outputs.max(1)[1] == y).sum().item()
    avg_val_loss = val_loss / len(test_loader.dataset)
    avg_val_acc = val_acc / len(test_loader.dataset)
    
    if(epoch % 100 == 0):
        print ('Epoch [{}/{}], Loss: {loss:.4f}, val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}'
                   .format(epoch+1, num_epochs, i+1, loss=avg_train_loss, val_loss=avg_val_loss, val_acc=avg_val_acc))
    train_loss_list.append(avg_train_loss)
    train_acc_list.append(avg_train_acc)
    val_loss_list.append(avg_val_loss)
    val_acc_list.append(avg_val_acc)
 
end = time.time() - start
print(end)

import matplotlib.pyplot as plt
%matplotlib inline
print(end)
plt.plot(train_acc_list, label='Training loss')
plt.plot(val_acc_list, label='Validation loss')
plt.legend();

test = test.values

batch_sizes = 1
test = torch.tensor(test, dtype=torch.float32)
test_sets = torch.utils.data.TensorDataset(test)
new_test_loader = torch.utils.data.DataLoader(test, batch_size = batch_sizes, shuffle = False)

pred_list = []
start = time.time()
with torch.no_grad():
    net.eval()
    c = 0
    for images in new_test_loader:
        images = images.to(device)
        output = net.forward(images)
        _, pred = torch.max(output.data, 1) 
        pred_list.append(pred.item())
print(time.time() - start)

pred_list = np.array(pred_list).flatten()

submissions=pd.DataFrame({"id": id_list_raw,
                         "生存確率": pred_list})
submissions.to_csv("submit_nueral.tsv", sep='\t', index=False, header=False)