Java使用URL_API时出现:java.net.SocketException: Unexpected end of file from server
博主在使用Java自带的API实现网络爬虫的时候,在请求方式为”POST”下,运行出现:java.net.SocketException: Unexpected end of file from server,如下:
博主认为可能使用了“GET”方式请求后,尝试用“POST”方式去请求网站数据后,网站服务器无法继续响应操作,猜测这可能跟网站的代理有关。
博主爬取的网站为酷狗音乐的官网:https://www.kugou.com/(可能就是该网站的问题)
于是博主换了一个网站后,运行成功!!!
故告诉大家,假如出现相同错误,可以不使用“POST"去请求,直接用"GET"方式,或者换个网站/地址。
最后分享博主的菜鸡爬虫demo:
package test; import org.junit.Test; import java.io.*; import java.net.HttpURLConnection; import java.net.URL; import java.net.HttpURLConnection; /** * @Author 海龟 * @Date 2020/10/3 21:30 * @Desc 演示使用JDK自带的API实现网络爬虫 */ public class JDKAPITest { @Test public void testGet() throws Exception { //1.确定要访问爬取的URL URL ur1 = new URL("https://www.kugou.com/?username=xx"); //2.获取链接对象 HttpURLConnection urlConnection = (HttpURLConnection) ur1.openConnection(); //3.设置连接信息:请求方式/请求参数/请求头 urlConnection.setRequestMethod("GET");//请求方式默认GET,大写 urlConnection.setRequestProperty("User-Agent","Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Mobile Safari/537.36 Edg/85.0.564.68"); urlConnection.setConnectTimeout(30000);//设置超时时间,单位毫秒 //4.获取数据 InputStream input = urlConnection.getInputStream();//流操作不方便,故使用BufferedReader包装 BufferedReader reader = new BufferedReader(new InputStreamReader(input)); //一行一行读出来 String line; String all_line = ""; while((line = reader.readLine()) != null){ all_line += line + "\n"; } System.out.println(all_line); //5.关闭连接 input.close(); reader.close(); } @Test public void testPost() throws Exception { //1.确定URL URL url = new URL("http://www.itcast.cn/"); //2.获取链接对象 HttpURLConnection urlConnection = (HttpURLConnection)url.openConnection(); //3.设置连接信息,请求方式//请求参数//请求头 urlConnection.setDoOutput(true);//允许向url输出内容 urlConnection.setRequestMethod("POST"); urlConnection.setRequestProperty("User-Agent","Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Mobile Safari/537.36 Edg/85.0.564.68"); urlConnection.setConnectTimeout(30000); OutputStream output = urlConnection.getOutputStream(); output.write("username==xx".getBytes()); //4.获取数据 InputStream input = urlConnection.getInputStream();//流操作不方便,故使用BufferedReader包装 BufferedReader reader = new BufferedReader(new InputStreamReader(input)); //一行一行读出来 String line; String all_line = ""; while((line = reader.readLine()) != null){ all_line += line + "\n"; } System.out.println(all_line); //5.关闭连接 input.close(); reader.close(); } }
感谢大家!!