해결된 질문
작성
·
89
0
안녕하세요 선생님..
이 문제에서
import pandas as pd
pd.set_option('display.max_column',None)
pd.set_option('display.float_format',"{:.10f}".format)
train = pd.read_csv("data/customer_train.csv")
test = pd.read_csv("data/customer_test.csv")
# print(train.shape, test.shape) # 3500,11 / 2482 ,10개
# print(train.isnull().sum()) # 환불금액 결측치 있음 2295
train = train.fillna(0)
test = train.fillna(0)
# print(train.isnull().sum()) 결측치 제거완료
# print(train.head())
# print(train.info()) # 주 구매상품, 주 구매지점
# print(train.describe(include='object')) # 유니크가 42개, 24개라서 라벨인코더 가야할듯
# cols = train.select_dtypes(inclued='object').coulmns !!!!
# print(train.head())
cols = ['주구매상품', '주구매지점']
# print(train['주구매상품'].nunique())
# print(test['주구매상품'].nunique())
# print(train.describe(include='O'))
# print(test.describe(include='O'))
from sklearn.preprocessing import LabelEncoder
for col in cols :
le = LabelEncoder()
train[col] = le.fit_transform(train[col])
test[col] = le.transform(test[col])
# print(train.shape, test.shape)
# print(train.head())
target = train.pop('성별')
# print(target)
from sklearn.model_selection import train_test_split
X_tr,X_val,y_tr,y_val = train_test_split(train,target,test_size=0.2)
# print(X_tr.shape, X_val.shape, y_tr.shape, y_val.shape) # 2800
from sklearn.metrics import roc_auc_score
from sklearn.ensemble import RandomForestClassifier
rf = RandomForestClassifier()
rf.fit(X_tr,y_tr)
pred = rf.predict_proba(test) <---- 실행했는데
여기를 실행하면
ValueError: X has 11 features, but DecisionTreeClassifier is expecting 10 features as input.
가 발생합니다.. 대체 왜 그럴까요 ㅠㅠ??
헉..감사합니다