T2-1
강사님 해당문제 아래와 같이 코드를 짰고 결과값을 도출했습니다. 이정도 평가지표와 예측결과면 40점 받는데에 문제없을까요? 맨아래 사진이 코드이고 그 위의값들이 결과값입니다0.839453284373725 [2]:0.9876977152899824# 시험환경 세팅 (코드 변경 X) import pandas as pd import numpy as np from sklearn.model_selection import train_test_split def exam_data_load(df, target, id_name="", null_name=""): if id_name == "": df = df.reset_index().rename(columns={"index": "id"}) id_name = 'id' else: id_name = id_name if null_name != "": df[df == null_name] = np.nan X_train, X_test = train_test_split(df, test_size=0.2, random_state=2021) y_train = X_train[[id_name, target]] X_train = X_train.drop(columns=[target]) y_test = X_test[[id_name, target]] X_test = X_test.drop(columns=[target]) return X_train, X_test, y_train, y_test df = pd.read_csv("../input/titanic/train.csv") X_train, X_test, y_train, y_test = exam_data_load(df, target='Survived', id_name='PassengerId') X_train.shape, X_test.shape, y_train.shape, y_test.shape import pandas as pd cols = ['Name', 'Cabin', 'Ticket'] for col in cols: X_train = X_train.drop(col, axis =1) X_test = X_test.drop(col, axis = 1) X_train['Age'] = X_train['Age'].fillna(X_train['Age'].median()) X_test['Age'] = X_test['Age'].fillna(X_test['Age'].median()) X_train['Embarked'] = X_train['Embarked'].fillna('S') X_test['Embarked'] = X_test['Embarked'].fillna('S') #print(df.isnull().sum()) #print(X_test.isnull().sum()) #print(df.head()) #print(X_test) cols = ['Sex','Embarked'] from sklearn.preprocessing import LabelEncoder for col in cols: le = LabelEncoder() X_train[col] = le.fit_transform(X_train[col]) X_test[col] = le.transform(X_test[col]) df = pd.concat([X_train,y_train['Survived']], axis = 1) from sklearn.metrics import roc_auc_score from sklearn.model_selection import train_test_split X_train = X_train.drop('PassengerId', axis = 1) X_test_id = X_test.pop('PassengerId') X_tr, X_val, y_tr, y_val = train_test_split(X_train, y_train['Survived'], test_size = 0.2, random_state = 2023) from sklearn.ensemble import RandomForestClassifier model = RandomForestClassifier() model.fit(X_tr, y_tr) pred = model.predict(X_val) print(roc_auc_score(y_val, pred)) pred = model.predict(X_test) submit = pd.DataFrame({'PassengerId': X_test_id, 'Survived': pred }) submit.to_csv('0000.csv', index = False) pd.read_csv('0000.csv') model.score(X_tr, y_tr)