由于所爬取的网站需要验证码,通过网页的开发人员工具【F12】及在线http post,get接口测试请求工具(http://coolaf.com/)发现访问时加上请求头header 信息时可以跳过验证码校验。
而且该网站只接受post请求,对提交的参数也只接受json格式,否则请求失败。
现将通过 post 方式提交json参数的方法记录如下:
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
|
import java.io.UnsupportedEncodingException;
import java.net.URI;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.List;
import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.methods.HttpRequestBase;
import org.apache.http.client.utils.URIBuilder;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.client.CloseableHttpClient;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.client.HttpClients;
import org.apache.http.util.EntityUtils;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
/**
* <p>@PostJsonParamsTest.java</p>
* @version 1.0
* @author zxk
* @Date 2018-3-3
*/
public class PostJsonParamsTest {
// 超时时间
private static final int RUN_TIME = 10000 ;
// 爬取初始页数
private String page;
public static void main(String[] args) throws Exception {
PostJsonParamsTest crawl = new PostJsonParamsTest();
// 请求的url地址
String url = "http://www.gzcredit.gov.cn/Service/CreditService.asmx/searchOrgWithPage" ;
// 设置起始访问页码
crawl.setPage( "1" );
String isStop = "" ;
// 设置请求
HttpRequestBase request = null ;
request = new HttpPost(url);
try {
// 设置config
RequestConfig requestConfig = RequestConfig.custom()
.setSocketTimeout(RUN_TIME)
.setConnectTimeout(RUN_TIME)
.setConnectionRequestTimeout(RUN_TIME)
.build();
request.setConfig(requestConfig);
// json 格式的 post 参数
String postParams = "{\\"condition\\":{\\"qymc\\":\\"%%%%\\",\\"cydw\\":\\"\\"},\\"pageNo\\":" +crawl.getPage()+ ",\\"pageSize\\":100,count:2709846}" ;
System.out.println(postParams);
HttpEntity httpEntity = new StringEntity(postParams);
((HttpPost) request).setEntity(httpEntity);
// 添加请求头,可以绕过验证码
request.addHeader( "Accept" , "application/json, text/javascript, */*" );
request.addHeader( "Accept-Encoding" , "gzip, deflate" );
request.addHeader( "Accept-Language" , "zh-CN,zh;q=0.8" );
request.addHeader( "Connection" , "keep-alive" );
request.addHeader( "Host" , "www.gzcredit.gov.cn" );
request.addHeader( "Content-Type" , "application/json; charset=UTF-8" );
URIBuilder builder = new URIBuilder(url);
URI uri = builder.build();
uri = new URI(URLDecoder.decode(uri.toString(), "UTF-8" ));
request.setURI(uri);
while (!isStop.equals( "停止" )||isStop.equals( "重跑" )){
isStop = crawl.crawlList(request);
if (isStop.equals( "爬取" )){
crawl.setPage(String.valueOf(Integer.parseInt(crawl.getPage())+ 1 ));
}
// if("2713".equals(crawl.getPage())) break;
if ( "2" .equals(crawl.getPage())){
break ;
}
}
} catch (NumberFormatException e) {
e.printStackTrace();
throw new NumberFormatException( "数字格式错误" );
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
throw new UnsupportedEncodingException( "不支持的编码集" );
}
}
/**
* 爬取搜索列表
* @param page
* @return
*/
private String crawlList(HttpRequestBase request){
int statusCode = 0 ;
// 下面两种方式都可以用来创建客户端连接,相当于打开了一个浏览器
CloseableHttpClient httpClient = HttpClients.createDefault();
// HttpClient httpClient = HttpClientBuilder.create().build();
HttpEntity httpEntity = null ;
HttpResponse response = null ;
try {
try {
response = httpClient.execute(request);
} catch (Exception e){
e.printStackTrace();
EntityUtils.consumeQuietly(httpEntity);
return "重跑" ;
}
//打印状态
statusCode =response.getStatusLine().getStatusCode();
if (statusCode!= 200 ){
EntityUtils.consumeQuietly(httpEntity);
return "重跑" ;
}
//实体
httpEntity = response.getEntity();
String searchListStr = EntityUtils.toString(httpEntity, "GBK" ).replaceAll( "\\\\\\\\米" , "米" );
String allData = (String) JSONObject.parseObject(searchListStr).get( "d" );
// 字符串值中间含双引号的替换处理
String s = allData.replaceAll( "\\\\{\\"" , "{'" )
.replaceAll( "\\":\\"" , "':'" )
.replaceAll( "\\",\\"" , "','" )
.replaceAll( "\\":" , "':" )
.replaceAll( ",\\"" , ",'" )
.replaceAll( "\\"\\\\}" , "'}" )
.replaceAll( "\\"" , "" )
.replaceAll( "'" , "\\"" )
.replaceAll( "<br />" , "" )
.replaceAll( "\\t" , "" )
.replaceAll( "\\\\\\\\" , "?" );
JSONObject jsonData = JSONObject.parseObject(s);
JSONArray jsonContent = jsonData.getJSONArray( "orgList" );
searchListStr = null ;
allData = null ;
s = null ;
if (jsonContent== null || jsonContent.size()< 1 ) {
return "重跑" ;
}
System.out.println(jsonContent.toJSONString());
return "爬取" ;
} catch (Exception e) {
e.printStackTrace();
return "重跑" ;
} finally {
EntityUtils.consumeQuietly(httpEntity);
}
}
private String getPage() {
return page;
}
private void setPage(String page) {
this .page = page;
}
}
|
补充知识:JAVA利用HttpClient发送post请求,将请求数据放到body里
我就废话不多说了,大家还是直接看代码吧~
?
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
|
/**
* post请求 ,请求数据放到body里
* @param url 请求地址
* @param bodyData 参数
* @author wangyj
* @date 2019年4月20日
*/
public static String doPostBodyData(String url, String bodyData) throws Exception{
String result = "" ;
CloseableHttpClient httpClient = null ;
CloseableHttpResponse response = null ;
try {
HttpPost httpPost = getHttpPost(url, null ); // 请求地址
httpPost.setEntity( new StringEntity(bodyData, Encoding));
httpClient = getHttpClient();
// 得到返回的response
response = httpClient.execute(httpPost);
HttpEntity entity = response.getEntity();
result = getResult(entity, Encoding);
} catch (Exception e) {
throw e;
} finally {
// 关闭httpClient
if ( null != httpClient) {
httpClient.close();
}
// 关闭response
if ( null != response) {
EntityUtils.consume(response.getEntity()); // 会自动释放连接
response.close();
}
}
return result;
}
|
以上这篇java 实现通过 post 方式提交json参数操作就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持快网idc。
原文链接:https://blog.csdn.net/zhouxukun123/article/details/79441031
相关文章
猜你喜欢
- 64M VPS建站:怎样选择合适的域名和SSL证书? 2025-06-10
- 64M VPS建站:怎样优化以提高网站加载速度? 2025-06-10
- 64M VPS建站:是否适合初学者操作和管理? 2025-06-10
- ASP.NET自助建站系统中的用户注册和登录功能定制方法 2025-06-10
- ASP.NET自助建站系统的域名绑定与解析教程 2025-06-10
TA的动态
- 2025-07-10 怎样使用阿里云的安全工具进行服务器漏洞扫描和修复?
- 2025-07-10 怎样使用命令行工具优化Linux云服务器的Ping性能?
- 2025-07-10 怎样使用Xshell连接华为云服务器,实现高效远程管理?
- 2025-07-10 怎样利用云服务器D盘搭建稳定、高效的网站托管环境?
- 2025-07-10 怎样使用阿里云的安全组功能来增强服务器防火墙的安全性?
快网idc优惠网
QQ交流群
您的支持,是我们最大的动力!
热门文章
-
2025-05-27 68
-
2025-05-25 68
-
2025-06-04 22
-
2025-05-29 78
-
2025-05-27 83
热门评论