작성
·
657
0
안녕하세요
파이썬 입문과크롤링 기초 부트캠프 수강 중
beautiful soup 명령 (크롤링) 실행 오류 발생하여 문의드립니다.
beautiful soup 명령 실행 시 아래와 같은 오류가 발생하네요.
(아나콘다 및 bs 재설치 해도 동일 문제 발생)
개인 노트북으로 학습 할 때는 잘되는데 회사 pc로 하니 이런 오류가 발생하네요..
사정상 회사pc로도 학습을 해야하는데 난감하네요 도움 부탁드립니다.
[명령문]
import requests
from bs4 import BeautifulSoup
res = requests.get('https://davelee-fun.github.io/blog/crawl_test_css.html')
soup = BeautifulSoup(res.content, 'html.parser')
[오류내용]
SSLCertVerificationError Traceback (most recent call last)
File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\connectionpool.py:714, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
713 # Make the request on the httplib connection object.
--> 714 httplib_response = self._make_request(
715 conn,
716 method,
717 url,
718 timeout=timeout_obj,
719 body=body,
720 headers=headers,
721 chunked=chunked,
722 )
724 # If we're going to release the connection in ``finally:``, then
725 # the response doesn't need to know about the connection. Otherwise
726 # it will also try to release it and we'll have a double-release
727 # mess.
File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\connectionpool.py:403, in HTTPConnectionPool._make_request(self, conn, method, url, timeout, chunked, **httplib_request_kw)
402 try:
--> 403 self._validate_conn(conn)
404 except (SocketTimeout, BaseSSLError) as e:
405 # Py2 raises this as a BaseSSLError, Py3 raises it as socket timeout.
File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\connectionpool.py:1053, in HTTPSConnectionPool._validate_conn(self, conn)
1052 if not getattr(conn, "sock", None): # AppEngine might not have `.sock`
-> 1053 conn.connect()
1055 if not conn.is_verified:
File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\connection.py:419, in HTTPSConnection.connect(self)
417 context.load_default_certs()
--> 419 self.sock = ssl_wrap_socket(
420 sock=conn,
421 keyfile=self.key_file,
422 certfile=self.cert_file,
423 key_password=self.key_password,
424 ca_certs=self.ca_certs,
425 ca_cert_dir=self.ca_cert_dir,
426 ca_cert_data=self.ca_cert_data,
427 server_hostname=server_hostname,
428 ssl_context=context,
429 tls_in_tls=tls_in_tls,
430 )
432 # If we're using all defaults and the connection
433 # is TLSv1 or TLSv1.1 we throw a DeprecationWarning
434 # for the host.
File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\util\ssl_.py:449, in ssl_wrap_socket(sock, keyfile, certfile, cert_reqs, ca_certs, server_hostname, ssl_version, ciphers, ssl_context, ca_cert_dir, key_password, ca_cert_data, tls_in_tls)
448 if send_sni:
--> 449 ssl_sock = _ssl_wrap_socket_impl(
450 sock, context, tls_in_tls, server_hostname=server_hostname
451 )
452 else:
File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\util\ssl_.py:493, in _ssl_wrap_socket_impl(sock, ssl_context, tls_in_tls, server_hostname)
492 if server_hostname:
--> 493 return ssl_context.wrap_socket(sock, server_hostname=server_hostname)
494 else:
File ~\AppData\Local\anaconda3\Lib\ssl.py:517, in SSLContext.wrap_socket(self, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, session)
511 def wrap_socket(self, sock, server_side=False,
512 do_handshake_on_connect=True,
513 suppress_ragged_eofs=True,
514 server_hostname=None, session=None):
515 # SSLSocket class handles server_hostname encoding before it calls
516 # ctx._wrap_socket()
--> 517 return self.sslsocket_class._create(
518 sock=sock,
519 server_side=server_side,
520 do_handshake_on_connect=do_handshake_on_connect,
521 suppress_ragged_eofs=suppress_ragged_eofs,
522 server_hostname=server_hostname,
523 context=self,
524 session=session
525 )
File ~\AppData\Local\anaconda3\Lib\ssl.py:1108, in SSLSocket._create(cls, sock, server_side, do_handshake_on_connect, suppress_ragged_eofs, server_hostname, context, session)
1107 raise ValueError("do_handshake_on_connect should not be specified for non-blocking sockets")
-> 1108 self.do_handshake()
1109 except (OSError, ValueError):
File ~\AppData\Local\anaconda3\Lib\ssl.py:1379, in SSLSocket.do_handshake(self, block)
1378 self.settimeout(None)
-> 1379 self._sslobj.do_handshake()
1380 finally:
SSLCertVerificationError: [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1006)
During handling of the above exception, another exception occurred:
MaxRetryError Traceback (most recent call last)
File ~\AppData\Local\anaconda3\Lib\site-packages\requests\adapters.py:486, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
485 try:
--> 486 resp = conn.urlopen(
487 method=request.method,
488 url=url,
489 body=request.body,
490 headers=request.headers,
491 redirect=False,
492 assert_same_host=False,
493 preload_content=False,
494 decode_content=False,
495 retries=self.max_retries,
496 timeout=timeout,
497 chunked=chunked,
498 )
500 except (ProtocolError, OSError) as err:
File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\connectionpool.py:798, in HTTPConnectionPool.urlopen(self, method, url, body, headers, retries, redirect, assert_same_host, timeout, pool_timeout, release_conn, chunked, body_pos, **response_kw)
796 e = ProtocolError("Connection aborted.", e)
--> 798 retries = retries.increment(
799 method, url, error=e, _pool=self, _stacktrace=sys.exc_info()[2]
800 )
801 retries.sleep()
File ~\AppData\Local\anaconda3\Lib\site-packages\urllib3\util\retry.py:592, in Retry.increment(self, method, url, response, error, _pool, _stacktrace)
591 if new_retry.is_exhausted():
--> 592 raise MaxRetryError(_pool, url, error or ResponseError(cause))
594 log.debug("Incremented Retry for (url='%s'): %r", url, new_retry)
MaxRetryError: HTTPSConnectionPool(host='davelee-fun.github.io', port=443): Max retries exceeded with url: /blog/crawl_test_css.html (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1006)')))
During handling of the above exception, another exception occurred:
SSLError Traceback (most recent call last)
Cell In[3], line 4
1 import requests
2 from bs4 import BeautifulSoup
----> 4 res = requests.get('https://davelee-fun.github.io/blog/crawl_test_css.html')
5 soup = BeautifulSoup(res.content, 'html.parser')
File ~\AppData\Local\anaconda3\Lib\site-packages\requests\api.py:73, in get(url, params, **kwargs)
62 def get(url, params=None, **kwargs):
63 r"""Sends a GET request.
64
65 :param url: URL for the new :class:`Request` object.
(...)
70 :rtype: requests.Response
71 """
---> 73 return request("get", url, params=params, **kwargs)
File ~\AppData\Local\anaconda3\Lib\site-packages\requests\api.py:59, in request(method, url, **kwargs)
55 # By using the 'with' statement we are sure the session is closed, thus we
56 # avoid leaving sockets open which can trigger a ResourceWarning in some
57 # cases, and look like a memory leak in others.
58 with sessions.Session() as session:
---> 59 return session.request(method=method, url=url, **kwargs)
File ~\AppData\Local\anaconda3\Lib\site-packages\requests\sessions.py:589, in Session.request(self, method, url, params, data, headers, cookies, files, auth, timeout, allow_redirects, proxies, hooks, stream, verify, cert, json)
584 send_kwargs = {
585 "timeout": timeout,
586 "allow_redirects": allow_redirects,
587 }
588 send_kwargs.update(settings)
--> 589 resp = self.send(prep, **send_kwargs)
591 return resp
File ~\AppData\Local\anaconda3\Lib\site-packages\requests\sessions.py:703, in Session.send(self, request, **kwargs)
700 start = preferred_clock()
702 # Send the request
--> 703 r = adapter.send(request, **kwargs)
705 # Total elapsed time of the request (approximately)
706 elapsed = preferred_clock() - start
File ~\AppData\Local\anaconda3\Lib\site-packages\requests\adapters.py:517, in HTTPAdapter.send(self, request, stream, timeout, verify, cert, proxies)
513 raise ProxyError(e, request=request)
515 if isinstance(e.reason, _SSLError):
516 # This branch is for urllib3 v1.22 and later.
--> 517 raise SSLError(e, request=request)
519 raise ConnectionError(e, request=request)
521 except ClosedPoolError as e:
SSLError: HTTPSConnectionPool(host='davelee-fun.github.io', port=443): Max retries exceeded with url: /blog/crawl_test_css.html (Caused by SSLError(SSLCertVerificationError(1, '[SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: self-signed certificate in certificate chain (_ssl.c:1006)')))
답변 1
0
구글에 에러난 내용 검색해서 알아본 조치사항을 적용해보니 해보니 아래와 같이 나오네요
사내 보안 같은것 때문에 안되는건지..
import requests
from bs4 import BeautifulSoup
res = requests.get('https://calendar.google.com/calendar/u/0/r/month?pli=1', verify=False)
soup = BeautifulSoup(res.content, 'html.parser')
C:\Users\6804314\AppData\Local\anaconda3\Lib\site-packages\urllib3\connectionpool.py:1056: InsecureRequestWarning: Unverified HTTPS request is being made to host 'calendar.google.com'. Adding certificate verification is strongly advised. See: https://urllib3.readthedocs.io/en/1.26.x/advanced-usage.html#ssl-warnings
warnings.warn(
안녕하세요. 답변 도우미입니다.
답글이 이미 있다고 나와서 문의를 놓쳤습니다. 죄송합니다.
말씀하신대로 , SSL 과 관련된 에러로 보이는데요. SSL 관련 에러는 보통 사내 PC 를 사용할 때 사내 PC 는 보안처리가 되어 있어서, 관련 에러가 나는 경우가 많습니다. 이 경우에는 부득이 개인 PC 로 사용하셔야 하는데요. 사실 개발용 회사 PC 가 아니라면, 개발 관련 작업은 사내 PC로는 다 막아놓은 회사들이 많아서, 개인 PC 로 익혀보시는 것이 장기적으로도 좋을 것 같습니다.
감사합니다.