java获取网页源代码

时间:2022-04-22
本文章向大家介绍java获取网页源代码,主要内容包括其使用实例、应用技巧、基本知识点总结和需要注意事项,具有一定的参考价值,需要的朋友可以参考一下。
 	public static String getHtmlContent(URL url, String encode) {
    StringBuffer contentBuffer = new StringBuffer(); 
 	   int responseCode = -1;
    HttpURLConnection con = null;
    try {
     con = (HttpURLConnection) url.openConnection();
     con.setRequestProperty("User-Agent", "Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");// IE代理进行下载
     con.setConnectTimeout(60000);
     con.setReadTimeout(60000);
     // 获得网页返回信息码
     responseCode = con.getResponseCode();
     if (responseCode == -1) {
      String re = url.toString() + " : connection is failure...";
      con.disconnect();
      return re;
     }
     if (responseCode >= 400) // 请求失败
     {
      String re = "请求失败:get response code: " + responseCode;
      con.disconnect();
      return re;
     } 
 	    InputStream inStr = con.getInputStream();
     InputStreamReader istreamReader = new InputStreamReader(inStr, encode);
     BufferedReader buffStr = new BufferedReader(istreamReader); 
 	    String str = null;
     while ((str = buffStr.readLine()) != null)
      contentBuffer.append(str);
     inStr.close();
    } catch (IOException e) {
     e.printStackTrace();
     contentBuffer = null;
     System.out.println("error: " + url.toString());
    } finally {
     con.disconnect();
    }
    return contentBuffer.toString();
   } 
 	  public static String getHtmlContent(String url, String encode) {
    if (!url.toLowerCase().startsWith("http://")) {
     url = "http://" + url;
    }
    try {
     URL rUrl = new URL(url);
     return getHtmlContent(rUrl, encode);
    } catch (Exception e) {
     e.printStackTrace();
     return "网址错误!";
    }
   }
   //测试
   /*public static void main(String argsp[]){
    System.out.println(getHtmlContent("www.ilkhome.cn","utf-8")) ;
 
   }*/