当先锋百科网

首页 1 2 3 4 5 6 7

转载自:

http://chengyoyo2006.blog.163.com/blog/static/84517348200810542114322/


如何用java实现抓取网页?

JAVA技术 2008-11-05 16:21:14 阅读103 评论0   字号:   订阅

import java.net.*;
import java.io.*;

public class Catch1 {
     public void test(){
    StringBuffer document= new StringBuffer();
    try {
      URL url = new URL("http://www.sohu.com");
      URLConnection conn = url.openConnection();
      BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream()));
      String line = null;
      while ((line = reader.readLine()) != null)
        document.append(line + "/n");
      reader.close();
    } catch (MalformedURLException e) {
      e.printStackTrace();
    } catch (IOException e) {
      e.printStackTrace();
    }

    //pzy add
    String str = document.toString();
    String strDir = "E://text";
    String strFile = "test.html";
    File myFile = new File(strDir, strFile);

    try {
      myFile.createNewFile();
      BufferedWriter bw = new BufferedWriter(
                            new FileWriter(myFile.getPath(), true));
      bw.write(str);
      bw.flush();
      bw.close();
    } catch (Exception ex) {
      ex.printStackTrace();
    }
  }

     public static void main(String[] args){
      Catch1 catch2=new Catch1();
         catch2.test();
           }
}