728x90
반응형
SMALL
📍 인스타그램 크롤링 실습
from selenium import webdriver
import chromedriver_autoinstaller
from selenium.webdriver.common.by import By
import time
driver = webdriver.Chrome()
📌 에러 해결법
1. 크롬의 버전을 확인
2. 크롬 드라이버를 크롬버전에 맞게 다운로드
https://chromedriver.chromium.org/downloads
👉현 크롬 버전이 없을 경우 크롬 버전 다운그레이드 하기!!
https://google-chrome.en.uptodown.com/windows/versions
1) 위 사이트에서 원하는 버전의 크롬을 다운로드
2) 현재 설치되어있는 크롬을 제거
3) 다운로드 받은 구버전의 크롬을 설치
4) 자동업데이트 끄기 or 제거 후 새버전 크롬 다운하기
1. 로그인
# driver 객체의 모든 요소를 찾는 작업에서 최대 3초 동안 대기, 이 기간 내에 요소가 사용 가능하면 즉시 처리
# 3초를 설정했음에도 1초만에 페이지 로딩시간이 걸렸다면 1초만 대기하고 넘어감
driver.implicitly_wait(3) # 페이지가 로드될때 3초까지 안뜨면 3초까지 기다렸다가 넘어가고 그안에 뜨면 넘어감
url = 'https://www.instagram.com/'
driver.get(url)
id = '******@naver.com' # 본인 계정 이메일
pw = '******' # 본인 계정의 비빌번호
# /html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[1]/div/label/input
#/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[2]/div/label/input
input_id = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[1]/div/label/input')
input_pw = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[2]/div/label/input')
input_id.send_keys(id)
input_pw.send_keys(pw)
driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[3]/button/div').click()
time.sleep(5)
XPath 복사하기
2. 해시태그 검색
hashtag = '술스타그램'
url = f'https://www.instagram.com/explore/tags/{hashtag}/'
driver.get(url)
time.sleep(5)
3. 스크롤 내리면서 새로운 게시물 보기
# 스크롤 내리면서 새로운 게시물 계속 보기
for _ in range(5):
driver.execute_script('window.scrollTo(0, document.body.scrollHeight)') # 자바스크립트 코드를 쓰면 자바스크립트가 먹힘
time.sleep(5)
4. 원하는 게시글 클릭하기
5. 좋아요 누르기
like_xpath = '/html/body/div[7]/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[3]/div/div/section[1]/span[1]/div/div/span'
driver.find_element(By.XPATH, like_xpath).click()
time.sleep(1)
6. 댓글 달기
comment = '사진 잘 보고갑니다!'
comment_xpath = '/html/body/div[7]/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[3]/div/div/section[3]/div/form/div/textarea'
# comment_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[3]/div/form/div/textarea'
driver.find_element(By.XPATH, comment_xpath).click() # 댓글창을 한번 클릭 후 댓글을 입력해야함
driver.find_element(By.XPATH, comment_xpath).send_keys(comment)
time.sleep(3)
send_xpath = '/html/body/div[7]/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[3]/div/form/div/div[2]/div'
driver.find_element(By.XPATH, send_xpath).click()
time.sleep(3)
7. 다음 게시글 누르기
next_xpath = '/html/body/div[7]/div[1]/div/div[3]/div/div/div/div/div[1]/div/div/div[2]/button/div/span'
driver.find_element(By.XPATH, next_xpath).click()
time.sleep(3)
8. 함수로 리팩토링
# 로그인
def login(id, pw):
input_id = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[1]/div/label/input')
input_pw = driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[2]/div/label/input')
input_id.send_keys(id)
input_pw.send_keys(pw)
driver.find_element(By.XPATH, '/html/body/div[2]/div/div/div[1]/div/div/div/div[1]/section/main/article/div[2]/div[1]/div[2]/form/div/div[3]/button').click()
# 해시태그 검색
def search(hashtag, scroll_times):
url = f'https://www.instagram.com/explore/tags/{hashtag}/'
driver.get(url)
time.sleep(5)
for _ in range(scroll_times):
driver.execute_script('window.scrollTo(0, document.body.scrollHeight)')
time.sleep(5)
# 좋아요 및 댓글달기(어떤사진을 선택할지 index, 댓글, 반복숫자)
def like_and_comment(nth, comment, repeat=1):
row = (nth-1) // 3 + 1
col = (nth-1) % 3 + 1
xpath = f'/html/body/div[2]/div/div/div[2]/div/div/div/div[1]/div[1]/div[2]/section/main/article/div[2]/div/div[{row}]/div[{col}]/a/div'
driver.find_element(By.XPATH, xpath).click()
time.sleep(3)
for i in range(repeat):
like_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[1]/span[1]/button'
driver.find_element(By.XPATH, like_xpath).click()
time.sleep(2)
comment_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[3]/div/form/div/textarea'
driver.find_element(By.XPATH, comment_xpath).click()
driver.find_element(By.XPATH, comment_xpath).send_keys(comment)
time.sleep(3)
send_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[2]/div/article/div/div[2]/div/div/div[2]/section[3]/div/form/div/div[2]/div'
driver.find_element(By.XPATH, send_xpath).click()
time.sleep(3)
if i+1 < repeat:
next_xpath = '/html/body/div[2]/div/div/div[3]/div/div/div[1]/div/div[3]/div/div/div/div/div[1]/div/div/div[2]/button'
driver.find_element(By.XPATH, next_xpath).click()
time.sleep(3)
driver = webdriver.Chrome()
driver.implicitly_wait(3)
url = 'https://www.instagram.com/'
driver.get(url)
id = 'dokin12@naver.com'
pw = 'zz2632'
login(id, pw)
hashtag = '술스타그램'
search(hashtag, 3)
time.sleep(5)
like_and_comment(7, '사진 잘 보고 갑니다!', 3)
728x90
반응형
LIST
'Python > Crawlling' 카테고리의 다른 글
[파이썬, Python] 이미지 크롤링 하기! (0) | 2023.09.05 |
---|---|
[파이썬, Python] 셀레니움(Selenium) 라이브러리를 활용한 브라우저 컨트롤링 (0) | 2023.09.05 |
[파이썬, Python] 크롤링(Crawlling)의 정의 & 크롤링 실습하기! (0) | 2023.09.04 |