해결된 질문
작성
·
220
0
import pandas as pd
train = pd.read_csv("train.csv")
test = pd.read_csv("test.csv")
cols = train.select_dtypes(include='object').columns
from sklearn.preprocessing import LabelEncoder
for col in cols:
le = LabelEncoder()
train[col]=le.fit_transform(train[col])
test[col]=le.transform(test[col])
from sklearn.model_selection import train_test_split
X_tr, X_val, y_tr, y_val = train_test_split(train.drop("TravelInsurance", axis=1), train["TravelInsurance"], test_size=0.1, random_state=2022)
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state=2022, max_depth=8, n_estimators=600)
rf.fit(X_tr, y_tr)
y_pred = rf.predict_proba(test)
submit = pd.DataFrame({"index":test.index, "y_pred": y_pred[:,1]}).to_csv("990906.csv", index=False)