本文共 3215 字,大约阅读时间需要 10 分钟。
#!/usr/bin/python#encoding=utf-8import sysfrom selenium import webdriverreload(sys)sys.setdefaultencoding( "utf-8" )driver = webdriver.PhantomJS(executable_path='/home/lhy/phantomjs-1.9.8-linux-x86_64/bin/phantomjs')driver.get("http://item.jd.com/2914823.html")#driver.find_element_by_id('search_form_input_homepage').send_keys("Nirvana")#driver.find_element_by_id("search_button_homepage").click()print driver.page_sourcefo = open("aaaa1.txt", "wb")fo.write(driver.page_source)fo.close()driver.quit()
#coding=utf-8import requestsimport reimport timefrom pyquery import PyQuery as pqfrom lxml import etreefrom bs4 import BeautifulSoupimport sysfrom selenium import webdriverfrom selenium.webdriver.common.desired_capabilities import DesiredCapabilitiesreload(sys)sys.setdefaultencoding("utf-8")urls=[]def getHtml2(url): user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; windows NT)' headers = {'User-Agent': user_agent} r = requests.post(url, headers=headers) fo = open("phonesinfo1.txt", "wb") fo.write(r.content) fo.close() #print r.content return r.contentdef getHtml(url): dcap = dict(DesiredCapabilities.PHANTOMJS) dcap["phantomjs.page.settings.userAgent"] = ( "Mozilla/4.0 (compatible; MSIE 5.5; windows NT)" ) driver = webdriver.PhantomJS(desired_capabilities=dcap) #driver = webdriver.PhantomJS(executable_path='/home/lhy/phantomjs-1.9.8-linux-x86_64/bin/phantomjs') #driver=webdriver.Chrome() driver.get(url) js="document.body.scrollTop=1000"#滚动条下拉1000px driver.execute_script(js) driver.implicitly_wait(30) #time.sleep(5) #fo = open("phonesinfo2.txt", "wb") #fo.write(driver.page_source) #fo.close() html=driver.page_source driver.quit() return htmldef getPqHtml(html): pqHtml = pq(html) return pqHtmldef getUrlsFromFile(fileUrl): with open('phoneurl.txt', 'r') as f: lines = f.readlines() for line in lines: url_one = line.strip() print url_one urls.append(url_one)url="http://localhost:8080/pro/html.html"text=getHtml(url)fo = open("taobao2.txt", "wb")fo.write(text)fo.close()print text
html 页面
# coding = utf-8from selenium import webdriverbrowser = webdriver.Firefox()browser.get("http://localhost:8080/pro")browser.find_element_by_name("password").clear() #先清除文本框上密码browser.find_element_by_name("username").send_keys("test") #设置值browser.find_element_by_name("password").send_keys("123") #设置值yzm=browser.find_element_by_class_name("yzm-img").find_element_by_tag_name("span").text #获取验证码值yzm=yzm.replace(' ','') #清除空格browser.find_element_by_class_name("yzm-sr").send_keys(yzm) #设置验证码browser.find_element_by_id("tijiao").click() #点击按钮 提交表单print browser.current_url browser.get("http://localhost:8080/pro/test.jsp")#模拟登陆成功后会自动把cookie保存在对象中,对需认证页面可直接访问print browser.page_source#browser.quit()
# coding = utf-8from selenium import webdriverbrowser = webdriver.Firefox()browser.get("http://www.baidu.com")browser.find_element_by_id("kw").clear()browser.find_element_by_id("kw").send_keys("selenium")browser.find_element_by_id("su").click()print browser.current_url #点击成功后调转页面的url#browser.quit()
转载地址:http://xwsga.baihongyu.com/