import pandas as pd import lightgbm as lgb from sklearn.model_selection import train_test_split df_train = pd.read_csv('train.csv') data = df_train.drop(columns=['Transported']) target = df_train['Transported'] cols = data.columns.to_list() for c in cols: data[c] = pd.Categorical(data[c]).codes x_train,x_test,y_train,y_test=train_test_split(data,target,test_size=0.2,shuffle=True) params = { 'objective': 'binary', 'boosting_type': 'gbdt', 'num_leaves': 31, 'learning_rate': 0.05, 'feature_fraction': 0.9 } clf = lgb.LGBMClassifier(**params) clf.fit(x_train, y_train) pred = clf.predict(x_test) df_test = pd.read_csv('test.csv') PassengerId = df_test['PassengerId'] cols = data.columns.to_list() for c in cols: df_test[c] = pd.Categorical(df_test[c]).codes pred = clf.predict(df_test) df_test['PassengerId'] = PassengerId df_test['Transported'] = pred df_test[['PassengerId','Transported']].to_csv("submission.csv",index=False)