작성
·
161
0
안녕하세요. 샘
불필요한 div, p 코드 사입 후 에러 발생 건 입니다.
import requests
from bs4 import BeautifulSoup
import time
req_header_dict = {
# 요청헤더 : 브라우저 정보
'user-agent':
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.99 Safari/537.36'
}
response = requests.get("https://search.naver.com/search.naver?where=news&sm=tab_jum&query=%EC%86%90%ED%9D%A5%EB%AF%BC", headers= req_header_dict)
html = response.text
soup = BeautifulSoup(html, "html.parser")
articles = soup.select("div.info_group") # 뉴스기사 div 10개 가져오기
for article in articles:
links = article.select("a.info") # 결과는 리스트
if len(links) >= 2:
url = links[1].attrs["href"]
response = requests.get(url, headers= req_header_dict)
html = response.text
soup = BeautifulSoup(html, "html.parser")
# 만약 뉴스라면
if "entertain" in response.url:
title = soup.select_one(".end_tit")
content = soup.select_one("#articeBody")
# 스포츠 뉴스라면
elif "sports" in response.url:
title = soup.select_one("h4.title")
content =soup.select_one("#newsEndContents")
# 본문 내용안에 불필요한 dvi 삭제
divs = content.select("div")
for div in divs:
div.decompose()
paragraphs = content.select("p")
for p in paragraphs:
p.decompose()
else:
title = soup.select_one(".tit.title_area")
content = soup.select_one("#newsct_article")
print("##########링크##########",url)
print("##########제목##########",title.text.strip())
print("##########본문##########",content.text.strip())
time.sleep(0.3)