코레일 크롤링 질문

코레일도 토요코인이랑 비슷하게 post 이용해서 크롤링하려 해봤는데 잘 안되더라고요,,

결국에는 그냥 selenium으로 들어가서 beautifulsoup로 좌석있는지 체크하고 10초마다 다시 조회한 다음 좌석있는지 다시 체크하는 식으로 해서 어느 정도 성공시키긴 했는데, 코레일은 requests랑 beautifulsoup만으로는 어려울까요?


import requests

from bs4 import BeautifulSoup

import time

token = ""
channel = "#test"
text = "Check your stock crawler."


def post_message(token, channel, text):

   requests.post("https://slack.com/api/chat.postMessage",

       headers={"Authorization": "Bearer "+token},

       data={"channel": channel,"text": text}

   )

 

myToken = "API토큰"

 

url = "https://www.letskorail.com/ebizprd/EbizPrdTicketpr21100W_pr21110.do"



#date_list = ['2022/06/25', '2022/07/02']

datas_obj = { 
    "selGoTrain": 5,
    "txtPsgFlg_1": 1,
    "txtPsgFlg_2": 0,
    "txtPsgFlg_8": 0,
    "txtPsgFlg_3": 0,
    "txtPsgFlg_4": 0,
    "txtPsgFlg_5": 0,
    "txtSeatAttCd_3": 000,
    "txtSeatAttCd_2": 000,
    "txtSeatAttCd_4": 15,
    "selGoTrainRa": 5,
    "radJobId": 1,
    "adjcCheckYn": "Y",
    "txtGoStart": "용산",
    "txtGoEnd": "전주",
    "txtGoStartCode": 104,
    "txtGoEndCode":45,
    "selGoYear": 2023,
    "selGoMonth":3,
    "selGoDay": 23,
    "selGoHour": 8,
    "txtGoHour": 85500,
    "txtGoYoil": "목",
    "selGoSeat1": 15,
    "txtPsgCnt1": 1,
    "txtPsgCnt2": 0,
    "txtGoPage": 1,
    "txtGoAbrdDt": 20230323,
    "checkStnNm": "Y",
    "txtMenuId": 11,
    "SeandYo": "N",
    "ra": 1,
    "hidRsvTpCd": 3,
    "txtPsgTpCd1": 1,
    "txtPsgTpCd2": 3,
    "txtPsgTpCd3": 1,
    "txtPsgTpCd5": 1,
    "txtPsgTpCd7": 1,
    "txtPsgTpCd8": 3,
    "txtDiscKndCd1": 000,
    "txtDiscKndCd2": 000,
    "txtDiscKndCd3": 111,
    "txtDiscKndCd5": 131,
    "txtDiscKndCd7": 112,
    "txtDiscKndCd8": 321,
    "txtCompaCnt1": 0,
    "txtCompaCnt2": 0,
    "txtCompaCnt3": 0,
    "txtCompaCnt4": 0,
    "txtCompaCnt5": 0,
    "txtCompaCnt6": 0,
    "txtCompaCnt7": 0,
    "txtCompaCnt8": 0

}


while True:
    time.sleep(2)
    response = requests.post(url, data=datas_obj)
    html = response.text
    soup = BeautifulSoup(html, 'html.parser')
    
    #a가 있으면 좌석 있음:#tableResult > tbody > tr:nth-child(1) > td:nth-child(6) > a:nth-child(1) > img
    #a없이 바로 이미지면 좌석 없음:#tableResult > tbody > tr:nth-child(1) > td:nth-child(6) > img
    # '#tableResult > tbody > tr:nth-child(1) > td:nth-child(6) > a:nth-child(1) > img' 선택자를 이용하여 좌석 이미지를 찾습니다.
    seat_with_a = soup.select_one('#tableResult > tbody > tr:nth-child(1) > td:nth-child(6) > a:nth-child(1) > img')
    seat_with_a2 = soup.select_one('#tableResult > tbody > tr:nth-child(3) > td:nth-child(6) > a:nth-child(1) > img')
#tableResult > tbody > tr:nth-child(3) > td:nth-child(6) > img
#tableResult > tbody > tr:nth-child(3) > td:nth-child(6) > a:nth-child(1) > img
    # '#tableResult > tbody > tr:nth-child(1) > td:nth-child(6) > img' 선택자를 이용하여 좌석 이미지를 찾습니다.
    seat_without_a = soup.select_one('#tableResult > tbody > tr:nth-child(1) > td:nth-child(6) > img')
    seat_without_a2 = soup.select_one("#tableResult > tbody > tr:nth-child(3) > td:nth-child(6) > img")

    # 좌석 이미지가 있으면 "좌석 있음"을, 없으면 "좌석 없음"을 출력합니다.
    if seat_with_a or seat_with_a2:
        print("좌석 있음")
        post_message(token, channel, "좌석 생김")
        
        
    elif seat_without_a and seat_without_a2:
        print("좌석 없음")

이렇게 했는데 데이터 자체가 잘못됐는지 중간에 print(html)로 보니까 제대로 해당 페이지를 가져온 것 같지도 않더라고요..

인프런 커뮤니티 질문&답변