📍 인스타그램 크롤링 실습
from selenium import webdriver
import chromedriver_autoinstaller
from selenium.webdriver.common.by import By
import time
driver = webdriver.Chrome()
📌 에러 해결법
1. 크롬의 버전을 확인
2. 크롬 드라이버를 크롬버전에 맞게 다운로드
https://chromedriver.chromium.org/downloads
ChromeDriver - WebDriver for Chrome - Downloads
Current Releases If you are using Chrome version 115 or newer, please consult the Chrome for Testing availability dashboard. This page provides convenient JSON endpoints for specific ChromeDriver version downloading. For older versions of Chrome, please se
chromedriver.chromium.org
👉현 크롬 버전이 없을 경우 크롬 버전 다운그레이드 하기!!
https://google-chrome.en.uptodown.com/windows/versions
Older versions of Google Chrome (Windows) | Uptodown
exe 118.0.5993.71 Oct 11, 2023 exe 117.0.5938.150 Oct 4, 2023 exe 117.0.5938.92 Sep 22, 2023 exe 117.0.5938.89 Sep 18, 2023 zip 117.0.5938.63 Sep 15, 2023 exe 116.0.5845.188 Sep 13, 2023 exe 116.0.5845.141 Sep 1, 2023 exe 116.0.5845.97 Aug 16, 2023 exe 115
google-chrome.en.uptodown.com
1) 위 사이트에서 원하는 버전의 크롬을 다운로드
2) 현재 설치되어있는 크롬을 제거
3) 다운로드 받은 구버전의 크롬을 설치
4) 자동업데이트 끄기 or 제거 후 새버전 크롬 다운하기
1. 로그인
# driver 객체의 모든 요소를 찾는 작업에서 최대 3초 동안 대기, 이 기간 내에 요소가 사용 가능하면 즉시 처리
# 3초를 설정했음에도 1초만에 페이지 로딩시간이 걸렸다면 1초만 대기하고 넘어감
driver.implicitly_wait(3) # 페이지가 로드될때 3초까지 안뜨면 3초까지 기다렸다가 넘어가고 그안에 뜨면 넘어감
url = 'https://www.instagram.com/'
driver.get(url)
id = '******@naver.com' # 본인 계정 이메일
pw = '******' # 본인 계정의 비빌번호
# /html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[1]/div/label/input
#/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[2]/div/label/input
input_id = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[1]/div/label/input')
input_pw = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[2]/div/label/input')
input_id.send_keys(id)
input_pw.send_keys(pw)
driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[3]/button/div').click()
time.sleep(5)
XPath 복사하기
2. 해시태그 검색
hashtag = '술스타그램'
url = f'https://www.instagram.com/explore/tags/{hashtag}/'
driver.get(url)
time.sleep(5)
3. 스크롤 내리면서 새로운 게시물 보기
# 스크롤 내리면서 새로운 게시물 계속 보기
for _ in range(5):
driver.execute_script('window.scrollTo(0, document.body.scrollHeight)') # 자바스크립트 코드를 쓰면 자바스크립트가 먹힘
time.sleep(5)
4. 원하는 게시글 클릭하기
5. 좋아요 누르기
like_xpath = '/html/body/div[7]/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[3]/div/div/section[1]/span[1]/div/div/span'
driver.find_element(By.XPATH, like_xpath).click()
time.sleep(1)
6. 댓글 달기
comment = '사진 잘 보고갑니다!'
comment_xpath = '/html/body/div[7]/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[3]/div/div/section[3]/div/form/div/textarea'
# comment_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[3]/div/form/div/textarea'
driver.find_element(By.XPATH, comment_xpath).click() # 댓글창을 한번 클릭 후 댓글을 입력해야함
driver.find_element(By.XPATH, comment_xpath).send_keys(comment)
time.sleep(3)
send_xpath = '/html/body/div[7]/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[3]/div/form/div/div[2]/div'
driver.find_element(By.XPATH, send_xpath).click()
time.sleep(3)
7. 다음 게시글 누르기
next_xpath = '/html/body/div[7]/div[1]/div/div[3]/div/div/div/div/div[1]/div/div/div[2]/button/div/span'
driver.find_element(By.XPATH, next_xpath).click()
time.sleep(3)
8. 함수로 리팩토링
# 로그인
def login(id, pw):
input_id = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[1]/div/label/input')
input_pw = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[2]/div/label/input')
input_id.send_keys(id)
input_pw.send_keys(pw)
driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[3]/button').click()
# 해시태그 검색
def search(hashtag, scroll_times):
url = f'https://www.instagram.com/explore/tags/{hashtag}/'
driver.get(url)
time.sleep(5)
for _ in range(scroll_times):
driver.execute_script('window.scrollTo(0, document.body.scrollHeight)')
time.sleep(5)
# 좋아요 및 댓글달기(어떤사진을 선택할지 index, 댓글, 반복숫자)
def like_and_comment(nth, comment, repeat=1):
row = (nth-1) // 3 + 1
col = (nth-1) % 3 + 1
xpath = f'/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main/article/div[2]/div/div[{row}]/div[{col}]/a/div'
driver.find_element(By.XPATH, xpath).click()
time.sleep(3)
for i in range(repeat):
like_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[1]/span[1]/button'
driver.find_element(By.XPATH, like_xpath).click()
time.sleep(2)
comment_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[3]/div/form/div/textarea'
driver.find_element(By.XPATH, comment_xpath).click()
driver.find_element(By.XPATH, comment_xpath).send_keys(comment)
time.sleep(3)
send_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[3]/div/form/div/div[2]/div'
driver.find_element(By.XPATH, send_xpath).click()
time.sleep(3)
if i+1 < repeat:
next_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[1]/div/div/div[2]/button'
driver.find_element(By.XPATH, next_xpath).click()
time.sleep(3)
driver = webdriver.Chrome()
driver.implicitly_wait(3)
url = 'https://www.instagram.com/'
driver.get(url)
id = 'dokin12@naver.com'
pw = 'zz2632'
login(id, pw)
hashtag = '술스타그램'
search(hashtag, 3)
time.sleep(5)
like_and_comment(7, '사진 잘 보고 갑니다!', 3)
'Python > Crawlling' 카테고리의 다른 글
[파이썬, Python] 이미지 크롤링 하기! (0) | 2023.09.05 |
---|---|
[파이썬, Python] 셀레니움(Selenium) 라이브러리를 활용한 브라우저 컨트롤링 (0) | 2023.09.05 |
[파이썬, Python] 크롤링(Crawlling)의 정의 & 크롤링 실습하기! (0) | 2023.09.04 |