작성
·
344
0
네이버 IT 뉴스 링크를 그냥 크롬이나 사파리를 통해서 접속하면 정상적으로 잘 접속이 됩니다.
하지만 requests.get을 사용하면 에러가 발생해 파싱을 정상적으로 하기 힘든 상황입니다.ㅠ
셀레니움을 사용하던지 다른 뉴스 페이지를 사용해야할거같습니다.
저의 경우는 구글뉴스를 파싱 했습니다.
좋은 강의 해주셔서 감사합니다.!
import re
import requests
from bs4 import BeautifulSoup
weather_url = "https://search.naver.com/search.naver?sm=tab_hty.top&where=nexearch&query=%EC%84%9C%EC%9A%B8+%EB%82%A0%EC%94%A8&oquery=%EC%84%9C%EC%9A%B8+%EB%82%A0%EC%8B%9C&tqi=h%2B4nksprvTVssQv3%2BkdssssstAZ-264690"
news_url = "https://www.google.com/search?q=%EA%B5%AC%EA%B8%80%EB%89%B4%EC%8A%A4&rlz=1C5CHFA_enKR979KR979&source=lnms&tbm=nws&sa=X&ved=2ahUKEwio8ZP_jIP9AhXnm1YBHfaRBAUQ_AUoAXoECAEQAw&biw=1057&bih=976&dpr=1"
english_url = "https://www.hackers.co.kr/?c=s_eng/eng_contents/I_others_english&keywd=haceng_submain_lnb_eng_I_others_english&logger_kw=haceng_submain_lnb_eng_I_others_english"
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
"Accept-Language":"ko-KR,ko;q=0.9,en-US;q=0.8,en;q=0.7"
}
def create_soup(url):
res = requests.get(url, headers=headers)
res.raise_for_status()
return BeautifulSoup(res.text, "lxml")
def get_weather_info():
soup = create_soup(weather_url)
current_temp = soup.find("div", attrs={"class": "temperature_text"}).get_text().strip().split()[-1][2:]
compare = soup.find("p", attrs={"class": "summary"}).get_text()
summary_list = soup.find("dl", attrs={"class": "summary_list"}).get_text().strip()
dust = soup.find_all("li", attrs={"class": "item_today level1"})[0].get_text().strip()
micro_dust = soup.find_all("li", attrs={"class": "item_today level1"})[1].get_text().strip()
uv = soup.find("li", attrs={"class": "item_today level2"}).get_text().strip()
sunset = soup.find("li", attrs={"class": "item_today type_sun"}).get_text().strip()
rain_ratio = soup.find("li", attrs={"class": "week_item today"}).find_all("span", attrs={"class": "weather_left"})
rain_ratio_am = rain_ratio[0].get_text().strip()
rain_ratio_pm = rain_ratio[1].get_text().strip()
print("현재 기온 : {}".format(current_temp))
print(compare)
print(summary_list)
print("{} / {} / {} / {}".format(dust, micro_dust, uv, sunset))
print("강수 확률 : {} / {}\n".format(rain_ratio_am, rain_ratio_pm))
def get_news_headline():
soup = create_soup(news_url)
headlines = soup.find_all("div", attrs={"class": "mCBkyc ynAwRc MBeuO nDgy9d"}, limit=6)
urls = soup.find_all("a", attrs={"class": "WlydOe"}, limit=6)
for idx, headline in enumerate(headlines):
print(headline.get_text())
print(urls[idx]["href"] + "\n")
def get_eng_sentence():
soup = create_soup(english_url)
expression = soup.find_all("b", attrs={"class": "conv_txtTitle"})
sentences = soup.find_all("div", attrs={"id": re.compile("conv_kor_t\d*")})
print("\n< 오늘의 영어 회화 >")
print("한글 표현")
print("* " + expression[0].get_text())
for sentence in sentences[0:len(sentences)//2]:
print(sentence.get_text().strip())
print("\n영어 표현")
print("* " + expression[1].get_text())
for sentence in sentences[len(sentences) // 2:]:
print(sentence.get_text().strip())
def main():
get_weather_info()
get_news_headline()
get_eng_sentence()
if __name__ == "__main__":
main()
답변