Note

인스타그램 크롤링 (8) - 사진 정보 크롤링 본문

etc/Crawling

인스타그램 크롤링 (8) - 사진 정보 크롤링

알 수 없는 사용자 2022. 7. 13. 22:43
728x90
dict = {}

cnt = 0

MAX_SLEEP_TIME = 10

for i in tqdm(array_p_id):
    
    if cnt % 5 == 0:
        time.sleep(10)
    cnt += 1
    
    target_info = {}
    
    try:
        driver.get("https://www.instagram.com/p/" + i)
        
        rand_value = np.random.randint(3, MAX_SLEEP_TIME)
        time.sleep(rand_value)
        
        body = driver.find_element_by_tag_name('body')
        html0 = driver.page_source # 현재 페이지 html
        html = bs(html0,'html.parser')
        
        # insta id
        insta_id = html.find_all('a', {'class' : 'oajrlxb2 g5ia77u1 qu0x051f esr5mh6w e9989ue4 r7d6kgcz rq0escxv nhd2j8a9 nc684nl6 p7hjln8o kvgmc6g5 cxmmr5t8 oygrvhab hcukyx3x jb3vyjys rz4wbd8a qt6c0cv9 a8nywdso i1ao9s8h esuyzwwr f1sip0of lzcic4wl _acan _acao _acat _acaw _a6hd'})[0]
        insta_id = insta_id.get_text()
        target_info['insta_id'] = insta_id
        
        # contents
        contents = html.find_all('span', {'class' : '_aacl _aaco _aacu _aacx _aad7 _aade'})[0]
        contents = contents.get_text()
        target_info['contents'] = contents
     
        
        # like
        like = html.find_all('div', {'class' : '_aacl _aaco _aacw _aacx _aada _aade'})[0]
        like = like.get_text()
        if like == '여러 명':
            target_info['like'] = like
        else:
            like = like.split(' ')[1].split('개')[0]
            target_info['like'] = like
        
        # date
        date = html.find_all('time', {'class' : '_aaqe'})[0]
        date = date['datetime']
        date = date.split('T')[0]
        target_info['date'] = date
        
        dict[i] = target_info
    
        
    except:
        
        rand_value = np.random.randint(3, MAX_SLEEP_TIME)
        time.sleep(rand_value)
        
        pass
        
info = pd.DataFrame.from_dict(dict, 'index')
info = info.reset_index(drop = False)
info.columns = ['picture_id', 'insta_id', 'contents', 'like', 'date']
info
Comments