from selenium import webdriver from selenium.webdriver.common.keys import Keys def yah(sym): ''' Scrape Yahoo Finance ''' chromedriver = "/Applications/chromedriver" os.environ["webdriver.chrome.driver"] = chromedriver driver = webdriver.Chrome(chromedriver) key = 'http://finance.yahoo.com/quote/'+sym+'/key-statistics?p='+sym hold = 'http://finance.yahoo.com/quote/'+sym+'/holders?p='+sym # Scrape data from key statistics page driver.get(key) cash_sel = '//*[@id="main-0-Quote-Proxy"]/section/div[2]/section/div/' \ + 'section/div[2]/div[1]/div[2]/div[5]/table/tbody/tr[1]/td[2]' debt_sel = '//*[@id="main-0-Quote-Proxy"]/section/div[2]/section/div/' \ + 'section/div[2]/div[1]/div[2]/div[5]/table/tbody/tr[3]/td[2]' ebitda_sel = '//*[@id="main-0-Quote-Proxy"]/section/div[2]/section/div/' \ + 'section/div[2]/div[1]/div[2]/div[4]/table/tbody/tr[5]/td[2]' beta_sel = '//*[@id="main-0-Quote-Proxy"]/section/div[2]/section/' \ + 'div/section/div[2]/div[2]/div/div[1]/table/tbody/tr[1]/td[2]' cash = mb(driver.find_element_by_xpath(cash_sel).text) debt = mb(driver.find_element_by_xpath(debt_sel).text) ebitda = mb(driver.find_element_by_xpath(ebitda_sel).text) beta = bet(driver.find_element_by_xpath(beta_sel).text) # Scrape data from holders page driver.get(hold) inside_sel = '//*[@id="main-0-Quote-Proxy"]/section/div[2]/section/div/' \ + 'section/div[3]/div[2]/div[1]/table/tbody/tr[1]/td[1]' inst_sel = '//*[@id="main-0-Quote-Proxy"]/section/div[2]/section/div/' \ + 'section/div[3]/div[2]/div[1]/table/tbody/tr[2]/td[1]' inside = pct(driver.find_element_by_xpath(inside_sel).text) inst = pct(driver.find_element_by_xpath(inst_sel).text) return (cash, debt, ebitda, beta, inside, inst) for t in tqdm(ticks): try: yaht = yah(t) cash.append(yaht[0]) debt.append(yaht[1]) ebitda.append(yaht[2]) beta.append(yaht[3]) inside.append(yaht[4]) inst.append(yaht[5]) ticky.append(t) except: continue