목록etc/Crawling (25)
Note
dict = {} cnt = 0 MAX_SLEEP_TIME = 10 for i in tqdm(array_p_id): if cnt % 5 == 0: time.sleep(10) cnt += 1 target_info = {} try: driver.get("https://www.instagram.com/p/" + i) rand_value = np.random.randint(3, MAX_SLEEP_TIME) time.sleep(rand_value) body = driver.find_element_by_tag_name('body') html0 = driver.page_source # 현재 페이지 html html = bs(html0,'html.parser') # insta id insta_id = html.find..
max_sleep = 10 rand_time = np.random.randint(3, max_sleep) time.sleep(rand_time)
df = pd.DataFrame() ccnt = 0 for j in range(10): if cnt % 3 == 0: time.sleep(10) ccnt += 1 body = driver.find_element_by_tag_name('body') num_of_pagedowns = 5 while num_of_pagedowns: body.send_keys(Keys.PAGE_DOWN) time.sleep(3) num_of_pagedowns -= 1 html0 = driver.page_source # 현재 페이지 html html = bs(html0,'html.parser') picture_info = {} cnt = 0 for i in tqdm(range(0,len(html.find_all('img', {'c..
# 이미지들 image_insta 폴더에 다운받기 import os import urllib.request # 만약 폴더가 없으면 만들어라 if not os.path.exists("image_insta"): os.makedirs("image_insta") for i in range(0, len(result_df['picture'])): try: index = result_df['picture'][i] date = result_df['date'][i] urllib.request.urlretrieve(index, "image_insta/{0}_{1}.jpg".format(keyword, i)) except: pass
# 인스타 picture_id # www.instagram.com/p/abcd/형태 # abcd 추출 try: overlays5 = 'div._ab8w._ab94._ab99._ab9f._ab9k._ab9o > div > div > a' picture_id = driver.find_element_by_css_selector(overlays5) picture_id = picture_id.get_attribute('href') picture_id = picture_id.split('/')[4] except: try: overlays5 = 'div._ab8w._ab94._ab99._ab9f._ab9k._ab9o > div > div > div > a' picture_id = driver.find_element_..