有一个网站,想获取它的数据信息,但需要登录才可以获取到。他们有图片验证码,我现在能否在我这里远程调用他的图片验证码url,手动录入这个验证码,来实现模拟登录获取数据信息的?我目前测试的好像总是提示验证码不正确,不清楚怎么回事?
可以: 1、访问图片的URL,获取 第一次的cookie1,下载图片,识别图片 str。 2、使用 上一次的 cookie1 和 str 加你的用户名和密码等.... 登陆,获取 cookie2 3、访问带上cookie2 在获取验证码时不但要把验证码下载下来,还需要保存当时的参数(百度登录里面是token)和cookie,然后在收入验证码后把验证码和上一次的cookie+参数再加到post参数里面,就OK了,这样验证码就不会变 我用代码说话吧: 语言:Java 第三方jar:httpclient-4.2.1 package cn.gb40; import java.io.BufferedInputStream; import java.io.BufferedOutputStream; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.net.URLConnection; import java.util.ArrayList; import java.util.List; import org.apache.http.Consts; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.entity.UrlEncodedFormEntity; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.cookie.Cookie; import org.apache.http.entity.mime.HttpMultipartMode; import org.apache.http.entity.mime.MultipartEntity; import org.apache.http.entity.mime.content.FileBody; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.message.BasicNameValuePair; import org.apache.http.params.CoreConnectionPNames; import cn.uuwise.CQZDMDLL; public class Login { public static DefaultHttpClient httpclient = new DefaultHttpClient(); public static String login_url = "http://www.400gb.com/index.php"; public static String img_path_url = "http://www.400gb.com/randcodeV2_login.php"; public static String image_save_path = "c://vcode.png"; public static String update_url = "http://upload.zhuanmi.net/web/upload.do"; public static String userid = "1883879"; public static String modify_file_url = "http://newhome.400gb.com/iajax.php?item=file_act"; public static String delete_file_url = "http://newhome.400gb.com/iajax.php?item=file_act&action=file_delete&task=file_delete&file_id="; public static void main(String[] args) throws Exception { String cookie = imgCookie(); System.out.println("img cookie " + cookie); String code = identifyImg(); cookie = loginCookie(code, cookie); System.out.println("login cookie " + cookie); // <span></span> } public static String imgCookie() { BufferedReader in = null; try { httpclient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 20000); HttpGet httpGet = new HttpGet(img_path_url); httpGet.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT, 20000); HttpResponse response = httpclient.execute(httpGet); //保存图片 download(response.getEntity().getContent(), image_save_path); List<Cookie> cookies = httpclient.getCookieStore().getCookies(); httpGet.releaseConnection(); StringBuilder cookiesSB = new StringBuilder(); System.out.println("第一次cookie"); if (cookies.isEmpty()) { System.out.println("None"); } else { for (int i = 0; i < cookies.size(); i++) { // System.out.println("- " + cookies.get(i).toString()); cookiesSB.append(cookies.get(i).getName()).append("=") .append(cookies.get(i).getValue()).append("; "); } } return cookiesSB.toString(); } catch (Exception e) { e.printStackTrace(); return null; } finally{ if(in != null) try { in.close(); } catch (IOException e) { e.printStackTrace(); } } } public static String identifyImg() String str = CQZDMDLL.result(image_save_path); if(str != null && str.trim().matches("\\d{4,}")) return str; return null; } public static String loginCookie(String code, String cookie) { try { httpclient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 20000); // 2 用户登录 HttpPost httppost = new HttpPost(login_url); httppost.setHeader("Cookie", cookie); List<NameValuePair> nvps = new ArrayList<NameValuePair>(); nvps.add(new BasicNameValuePair("item", "account")); nvps.add(new BasicNameValuePair("action", "login")); nvps.add(new BasicNameValuePair("task", "login")); nvps.add(new BasicNameValuePair("ref", "")); nvps.add(new BasicNameValuePair("formhash", "fba6075a")); nvps.add(new BasicNameValuePair("randcodeV2", code)); nvps.add(new BasicNameValuePair("username", "----")); nvps.add(new BasicNameValuePair("password", "-------")); httppost.setHeader("Host", "www.400gb.com"); httppost.setHeader("Origin", "http://www.400gb.com"); httppost.setHeader("Referer", "http://www.400gb.com/index.php"); httppost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36 CoolNovo/2.0.9.19"); httppost.setEntity(new UrlEncodedFormEntity(nvps, Consts.UTF_8)); httppost.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT, 20000); HttpResponse response = httpclient.execute(httppost); System.out.println("Login form get: " + response.getStatusLine().getStatusCode()); // System.out.println(sb.toString()); List<Cookie> cookies = httpclient.getCookieStore().getCookies(); httppost.releaseConnection(); StringBuilder cookiesSB = new StringBuilder(); System.out.println("第一次cookie"); if (cookies.isEmpty()) { System.out.println("None"); } else { for (int i = 0; i < cookies.size(); i++) { // System.out.println("- " + cookies.get(i).toString()); cookiesSB.append(cookies.get(i).getName()).append("=") .append(cookies.get(i).getValue()).append("; "); } } httppost.releaseConnection(); return cookiesSB.toString(); } catch (Exception e) { e.printStackTrace(); } return null; } public static String rend(String cookie) { BufferedReader in = null; try { // 1 获取 _tb_token_ httpclient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 20000); HttpGet httpGet = new HttpGet("http://www.400gb.com/index.php"); httpGet.setHeader("Cookie", cookie); httpGet.setHeader("Host", "www.400gb.com"); httpGet.setHeader("Referer", "http://www.400gb.com/index.php"); httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36 CoolNovo/2.0.9.19"); httpGet.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT, 20000); HttpResponse response = httpclient.execute(httpGet); in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), "utf-8")); StringBuilder sb = new StringBuilder(); String s = ""; while ((s = in.readLine()) != null) { sb.append(s.trim()).append("\n"); } System.out.println(sb.toString()); return sb.toString(); } catch (Exception e) { e.printStackTrace(); return null; } finally{ if(in != null) try { in.close(); } catch (IOException e) { e.printStackTrace(); } } } public static long upload(String cookie, String path) throws Exception { BufferedReader in = null; String newurl = update_url + "?userid=" + userid + "&folderid=0&key=key.....................&maxsize=2147483648"; HttpPost post = new HttpPost(newurl); post.setHeader("Cookie", cookie); post.setHeader("Referer", "http://newhome.400gb.com/?item=files&action=index"); // post.setHeader("Content-Type", "multipart/form-data; boundary=----------ei4Ef1Ef1Ef1cH2KM7ei4gL6gL6ei4"); post.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.17 (KHTML, like Gecko) Chrome/24.0.1312.57 Safari/537.17"); //对请求的表单域进行填充 // MultipartEntity reqEntity = new MultipartEntity(HttpMultipartMode.BROWSER_COMPATIBLE , null, Charset.forName("UTF-8")); MultipartEntity reqEntity = new MultipartEntity(HttpMultipartMode.BROWSER_COMPATIBLE); reqEntity.addPart( "fileData", new FileBody( new File(path), "text/plain" )); post.setEntity(reqEntity); // Here we go! try { HttpResponse response = httpclient.execute(post); System.out.println("upload code " + response.getStatusLine().getStatusCode()); if(200 == response.getStatusLine().getStatusCode()) { } in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), "utf-8")); StringBuilder sb = new StringBuilder(); String s = ""; while ((s = in.readLine()) != null) { sb.append(s.trim()).append("\n"); } //{"filename":"head","updatetime":"2013-06-10 13:00","filetype":"jpg","istype":1,"username":"safetys","filesize":"10.47 KB","uid":"841690","parentid":null,"ip":"125.39.35.21","sid":8859860,"filethumb":"jpg"} String str = sb.toString(); if(str == null || !str.trim().matches("\\d+")) return 0; else return Long.parseLong(str.trim()); } catch (ClientProtocolException e) { System.err.println(e); } catch (IOException e) { System.err.println(e); } post.releaseConnection(); return 0l; } public static void addDesc(String cookie, long fileid, String desc) { BufferedReader in = null; try { httpclient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 20000); // 2 用户登录 HttpPost httppost = new HttpPost(modify_file_url); httppost.setHeader("Cookie", cookie); List<NameValuePair> nvps = new ArrayList<NameValuePair>(); nvps.add(new BasicNameValuePair("action", "file_modify")); nvps.add(new BasicNameValuePair("task", "file_modify")); nvps.add(new BasicNameValuePair("formhash", "a410f69a")); nvps.add(new BasicNameValuePair("file_id", String.valueOf(fileid))); nvps.add(new BasicNameValuePair("file_name", "vcro3de.png")); nvps.add(new BasicNameValuePair("file_description", desc)); httppost.setHeader("Host", "newhome.400gb.com"); httppost.setHeader("Origin", "http://newhome.400gb.com"); httppost.setHeader("Referer", "http://newhome.400gb.com/?item=files&action=index"); httppost.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36 CoolNovo/2.0.9.19"); httppost.setEntity(new UrlEncodedFormEntity(nvps, Consts.UTF_8)); httppost.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT, 20000); HttpResponse response = httpclient.execute(httppost); System.out.println("add description code: " + response.getStatusLine().getStatusCode()); in = new BufferedReader(new InputStreamReader(response.getEntity().getContent(), "utf-8")); StringBuilder sb = new StringBuilder(); String s = ""; while ((s = in.readLine()) != null) { sb.append(s.trim()).append("\n"); } httppost.releaseConnection(); // 返回OK 就证明修改成功,否则有可能文件重名了 System.out.println("update desc html " + sb.toString()); } catch (Exception e) { e.printStackTrace(); } finally { if(in != null) try { in.close(); } catch (IOException e) { e.printStackTrace(); } } } public static void deleteFile(String cookie, long fileid) { try { httpclient.getParams().setParameter(CoreConnectionPNames.CONNECTION_TIMEOUT, 20000); HttpGet httpGet = new HttpGet(delete_file_url + fileid); httpGet.setHeader("Cookie", cookie); httpGet.setHeader("Host", "newhome.400gb.com"); httpGet.setHeader("Referer", "http://newhome.400gb.com/?item=files&action=index"); httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/27.0.1453.110 Safari/537.36 CoolNovo/2.0.9.19"); httpGet.getParams().setParameter(CoreConnectionPNames.SO_TIMEOUT, 20000); HttpResponse response = httpclient.execute(httpGet); System.out.println("delete file code " + response.getStatusLine().getStatusCode()); httpGet.releaseConnection(); } catch (Exception e) { e.printStackTrace(); } finally{ } } public static boolean download(InputStream in, String path) { FileOutputStream out = null; try { out = new FileOutputStream(path); byte b[] = new byte[1024]; int j = 0; while ((j = in.read(b)) != -1) { out.write(b, 0, j); } out.flush(); File file = new File(path); if(file.exists() && file.length() == 0) return false; return true; } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { if("FileNotFoundException".equals(e.getClass().getSimpleName())) System.err.println("download FileNotFoundException"); if("SocketTimeoutException".equals(e.getClass().getSimpleName())) System.err.println("download SocketTimeoutException"); else e.printStackTrace(); } finally{ if(out != null) try { out.close(); } catch (IOException e) { e.printStackTrace(); } if(in != null) try { in.close(); } catch (IOException e) { e.printStackTrace(); } } return false; } /** * 采集 * @param url:指定URL * @param times:如果采集失败,采集最少次数(2次) * @return */ public static boolean download(String urlstr, String path) { if(urlstr == null || "".equals(urlstr.trim())) return false; InputStream in = null; FileOutputStream out = null; try { System.out.println("download url " + urlstr); URL url = new URL(urlstr); URLConnection connection = url.openConnection(); connection.setConnectTimeout(5000);//jdk 1.5换成这个,连接超时 //connection.setReadTimeout(5000);//jdk 1.5换成这个,读操作超时 connection.setDoOutput(true); out = new FileOutputStream(path); in = connection.getInputStream(); byte b[] = new byte[1024]; int j = 0; while ((j = in.read(b)) != -1) { out.write(b, 0, j); } out.flush(); File file = new File(path); if(file.exists() && file.length() == 0) return false; return true; } catch (MalformedURLException e) { e.printStackTrace(); } catch (IOException e) { if("FileNotFoundException".equals(e.getClass().getSimpleName())) System.err.println("download FileNotFoundException"); if("SocketTimeoutException".equals(e.getClass().getSimpleName())) System.err.println("download SocketTimeoutException"); else e.printStackTrace(); } finally{ if(out != null) try { out.close(); } catch (IOException e) { e.printStackTrace(); } if(in != null) try { in.close(); } catch (IOException e) { e.printStackTrace(); } } return false; } public static InputStream getUrlImg(String URLName) throws Exception { ByteArrayOutputStream os = new ByteArrayOutputStream(); int HttpResult = 0; // 服务器返回的状态 URL url = new URL(URLName); // 创建URL URLConnection urlconn = url.openConnection(); // 试图连接并取得返回状态码urlconn.connect(); HttpURLConnection httpconn = (HttpURLConnection) urlconn; HttpResult = httpconn.getResponseCode(); System.out.println(HttpResult); if (HttpResult != HttpURLConnection.HTTP_OK) { // 不等于HTTP_OK说明连接不成功 System.out.print("连接失败!"); } else { int filesize = urlconn.getContentLength(); // 取数据长度 System.out.println(filesize); BufferedInputStream bis = new BufferedInputStream( urlconn.getInputStream()); BufferedOutputStream bos = new BufferedOutputStream(os); byte[] buffer = new byte[1024]; // 创建存放输入流的缓冲 int num = -1; // 读入的字节数 while (true) { num = bis.read(buffer); // 读入到缓冲区 if (num == -1) { bos.flush(); break; // 已经读完 } bos.flush(); bos.write(buffer, 0, num); } bos.close(); bis.close(); } ByteArrayInputStream bis = new ByteArrayInputStream(os.toByteArray()); return bis; } }
可以:
1、访问图片的URL,获取 第一次的cookie1,下载图片,识别图片 str。
2、使用 上一次的 cookie1 和 str 加你的用户名和密码等.... 登陆,获取 cookie2
3、访问带上cookie2
在获取验证码时不但要把验证码下载下来,还需要保存当时的参数(百度登录里面是token)和cookie,然后在收入验证码后把验证码和上一次的cookie+参数再加到post参数里面,就OK了,这样验证码就不会变
我用代码说话吧:
语言:Java
第三方jar:httpclient-4.2.1