https://dataq.goorm.io/exam/116674/체험하기/quiz/4
여기서 하는 성별예측 문제에서
import pandas as pd
train = pd.read_csv("data/customer_train.csv")
test = pd.read_csv("data/customer_test.csv")
# print(train.shape, test.shape)
# print(train.info())
# print(train.isnull().sum())
# print(test.isnull().sum())
#결측치 처리 (환불금액)
train['환불금액'] = train['환불금액'].fillna(0)
test['환불금액'] = test['환불금액'].fillna(0)
# print(test.isnull().sum())
# print(train['성별'].value_counts())
#인코딩
target = train.pop('성별')
train = pd.get_dummies(train)
test = pd.get_dummies(test)
#검증 데이터 분리
from sklearn.model_selection import train_test_split
X_tr, X_val, y_tr, y_val = train_test_split(train, target, test_size=0.2, random_state=0)
# print(X_tr.shape, X_val.shape, y_tr.shape, y_val.shape)
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier(random_state=0)
rf.fit(X_tr, y_tr)
pred = rf.predict_proba(X_val)
# print(pred)
from sklearn.metrics import roc_auc_score
roc = roc_auc_score(y_val, pred[:,1])
#예측 및 파일생성
pred = rf.predict_proba(test)
submit = pd.DataFrame({
"pred":pred[:,1]
})
submit.to_csv("result.csv", index=False)
# print(pd.read_csv("result.csv"))
이렇게 코딩을 했는데 예측 및 파일 생성에서
pred = rf.predict_proba(test) 여기 부분이 오류가 뜨는데 왜 그런가요 ㅠㅠ
> Makefile:6: recipe for target 'py3_run' failed
make: *** [py3_run] Error 1
Traceback (most recent call last):
File "/goorm/Main.out", line 43, in <module>
pred = rf.predict_proba(test)
File "/usr/local/lib/python3.9/dist-packages/sklearn/ensemble/_forest.py", line 674, in predict_proba
X = self._validate_X_predict(X)
File "/usr/local/lib/python3.9/dist-packages/sklearn/ensemble/_forest.py", line 422, in validateX_predict
return self.estimators_[0]._validate_X_predict(X, check_input=True)
File "/usr/local/lib/python3.9/dist-packages/sklearn/tree/_classes.py", line 407, in validateX_predict
X = self._validate_data(X, dtype=DTYPE, accept_sparse="csr",
File "/usr/local/lib/python3.9/dist-packages/sklearn/base.py", line 437, in validatedata
self._check_n_features(X, reset=reset)
File "/usr/local/lib/python3.9/dist-packages/sklearn/base.py", line 365, in checkn_features
raise ValueError(
ValueError: X has 73 features, but DecisionTreeClassifier is expecting 74 features as input.
이렇게 뜹니다,,