SIGNATE
プログラム
import time
import numpy as np
import pandas as pd
import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader,TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
train_raw = pd.read_csv('I:/SIGNATE/titanic/train.tsv', delimiter='\t', encoding='cp932')
test_raw = pd.read_csv('I:/SIGNATE/titanic/test.tsv', delimiter='\t', encoding='cp932')
train_raw = train_raw.drop('id', axis=1)
columns_list_raw = train_raw.columns
id_list_raw = test_raw.id.values
test_raw = test_raw.drop('id', axis=1)
train = train_raw.drop(['age', 'embarked'], axis=1)
test = test_raw.drop(['age', 'embarked'], axis=1)
sex_mapping = {'male': 0, 'female': 1}
train['sex'] = train_raw['sex'].map(sex_mapping)
test['sex'] = test_raw['sex'].map(sex_mapping)
#sklearnのmapを使用して、置き換えをする。
sex_mapping = {'male': 0, 'female': 1}
train['sex'] = train_raw['sex'].map(sex_mapping)
test['sex'] = test_raw['sex'].map(sex_mapping)
X = train.drop('survived', axis=1)
y= train.survived
X = X.values
y = y.values
from sklearn.model_selection import train_test_split
train_X, val_X, train_y, val_y = train_test_split(X, y, test_size=0.2, random_state=0)
train_X.shape
#テンソル型に変換する
train_X = torch.tensor(train_X, dtype=torch.float32)
train_y = torch.tensor(train_y, dtype=torch.int64)
val_X= torch.tensor(val_X, dtype=torch.float32)
val_y = torch.tensor(val_y, dtype=torch.int64)
train_set = torch.utils.data.TensorDataset(train_X,train_y)
test_set = torch.utils.data.TensorDataset(val_X, val_y)
batch_sizes = 32
train_loader = torch.utils.data.DataLoader(train_set, batch_size = batch_sizes, shuffle = False)
test_loader = torch.utils.data.DataLoader(test_set, batch_size = batch_sizes, shuffle = False)
#ネットワーク構築
class net(nn.Module):
def __init__(self):
super(net, self).__init__()
self.dence = nn.Sequential(
nn.Linear(5, 50),
nn.Dropout(0.2),
nn.Linear(50, 50),
nn.Dropout(0.2),
nn.Linear(50, 2))
def forward(self, x):
out = self.dence(x)
return(out)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
net = net().to(device)
#損失関数
criterion = nn.CrossEntropyLoss()
#最適化
optimizer = optim.Adam(net.parameters())
train_losses = []
test_losses = []
train_accuracy = []
test_accuracy = []
num_epochs = 500
train_loss_list = []
train_acc_list = []
val_loss_list = []
val_acc_list = []
import time
start = time.time()
for epoch in range(num_epochs):
train_loss = 0
train_acc = 0
val_loss = 0
val_acc = 0
#train
net.train()
for i, (X, y) in enumerate(train_loader):
X, y = X.to(device), y.to(device)
optimizer.zero_grad()
outputs = net.forward(X)
loss = criterion(outputs, y)
train_loss += loss.item()
train_acc += (outputs.max(1)[1] == y).sum().item()
loss.backward()
optimizer.step()
avg_train_loss = train_loss / len(train_loader.dataset)
avg_train_acc = train_acc / len(train_loader.dataset)
#val
net.eval()
with torch.no_grad():
for X, y in test_loader:
X = X.to(device)
y = y.to(device)
outputs = net.forward(X)
loss = criterion(outputs, y)
val_loss += loss.item()
val_acc += (outputs.max(1)[1] == y).sum().item()
avg_val_loss = val_loss / len(test_loader.dataset)
avg_val_acc = val_acc / len(test_loader.dataset)
if(epoch % 100 == 0):
print ('Epoch [{}/{}], Loss: {loss:.4f}, val_loss: {val_loss:.4f}, val_acc: {val_acc:.4f}'
.format(epoch+1, num_epochs, i+1, loss=avg_train_loss, val_loss=avg_val_loss, val_acc=avg_val_acc))
train_loss_list.append(avg_train_loss)
train_acc_list.append(avg_train_acc)
val_loss_list.append(avg_val_loss)
val_acc_list.append(avg_val_acc)
end = time.time() - start
print(end)
import matplotlib.pyplot as plt
%matplotlib inline
print(end)
plt.plot(train_acc_list, label='Training loss')
plt.plot(val_acc_list, label='Validation loss')
plt.legend();
test = test.values
batch_sizes = 1
test = torch.tensor(test, dtype=torch.float32)
test_sets = torch.utils.data.TensorDataset(test)
new_test_loader = torch.utils.data.DataLoader(test, batch_size = batch_sizes, shuffle = False)
pred_list = []
start = time.time()
with torch.no_grad():
net.eval()
c = 0
for images in new_test_loader:
images = images.to(device)
output = net.forward(images)
_, pred = torch.max(output.data, 1)
pred_list.append(pred.item())
print(time.time() - start)
pred_list = np.array(pred_list).flatten()
submissions=pd.DataFrame({"id": id_list_raw,
"生存確率": pred_list})
submissions.to_csv("submit_nueral.tsv", sep='\t', index=False, header=False)
参考
SAVING AND LOADING MODELS|PyTorch