技术开发 频道

用Java编程实现“网络蜘蛛”

例1:查找死链接(ChcekLinks.java)
import java.awt.*; import javax.swing.*; import java.net.*; import java.io.*; public class CheckLinks extends javax.swing.JFrame implements Runnable,ISpiderReportable { public CheckLinks() { //{{INIT_CONTROLS setTitle("找到死链接"); getContentPane().setLayout(null); setSize(405,288); setVisible(false); label1.setText("输入一个URL:"); getContentPane().add(label1); label1.setBounds(12,12,84,12); begin.setText("Begin"); begin.setActionCommand("Begin"); getContentPane().add(begin); begin.setBounds(12,36,84,24); getContentPane().add(url); url.setBounds(108,36,288,24); errorScroll.setAutoscrolls(true); errorScroll.setHorizontalScrollBarPolicy(javax.swing. ScrollPaneConstants.HORIZONTAL_SCROLLBAR_ALWAYS); errorScroll.setVerticalScrollBarPolicy(javax.swing. ScrollPaneConstants.VERTICAL_SCROLLBAR_ALWAYS); errorScroll.setOpaque(true); getContentPane().add(errorScroll); errorScroll.setBounds(12,120,384,156); errors.setEditable(false); errorScroll.getViewport().add(errors); errors.setBounds(0,0,366,138); current.setText("当前处理进度:"); getContentPane().add(current); current.setBounds(12,72,384,12); goodLinksLabel.setText("正常链接:0"); getContentPane().add(goodLinksLabel); goodLinksLabel.setBounds(12,96,192,12); badLinksLabel.setText("死链接:0"); getContentPane().add(badLinksLabel); badLinksLabel.setBounds(216,96,96,12); //}} //{{INIT_MENUS //}} //{{REGISTER_LISTENERS SymAction lSymAction = new SymAction(); begin.addActionListener(lSymAction); //}} } /** *参数args未使用 */ static public void main(String args[]) { (new CheckLinks()).setVisible(true); } /** *添加通知 */ public void addNotify() { //记录窗口尺寸并调用父类的addNotify. Dimension size = getSize(); super.addNotify(); if ( frameSizeAdjusted ) return; frameSizeAdjusted = true; //根据菜单栏等调整Frame尺寸 Insets insets = getInsets(); javax.swing.JMenuBar menuBar = getRootPane().getJMenuBar(); int menuBarHeight = 0; if ( menuBar != null ) menuBarHeight = menuBar.getPreferredSize().height; setSize(insets.left + insets.right + size.width, insets.top + insets.bottom + size.height + menuBarHeight); } boolean frameSizeAdjusted = false; //{{DECLARE_CONTROLS javax.swing.JLabel label1 = new javax.swing.JLabel(); javax.swing.JButton begin = new javax.swing.JButton(); javax.swing.JTextField url = new javax.swing.JTextField(); javax.swing.JScrollPane errorScroll = new javax.swing.JScrollPane(); /** *存储错误信息 */ javax.swing.JTextArea errors = new javax.swing.JTextArea(); javax.swing.JLabel current = new javax.swing.JLabel(); javax.swing.JLabel goodLinksLabel = new javax.swing.JLabel(); javax.swing.JLabel badLinksLabel = new javax.swing.JLabel(); //}} //{{DECLARE_MENUS //}} /** *后台蜘蛛线程 */ protected Thread backgroundThread; protected Spider spider; protected URL base; protected int badLinksCount = 0; protected int goodLinksCount = 0; /** *用于分发事件的内部类 */ class SymAction implements java.awt.event.ActionListener { public void actionPerformed(java.awt.event.ActionEvent event) { Object object = event.getSource(); if ( object == begin ) begin_actionPerformed(event); } } /** *当begin或cancel按钮被点击时调用 * *参数event与按钮相连 */ void begin_actionPerformed(java.awt.event.ActionEvent event) { if ( backgroundThread==null ) { begin.setLabel("Cancel"); backgroundThread = new Thread(this); backgroundThread.start(); goodLinksCount=0; badLinksCount=0; } else { spider.cancel(); } } /** *执行后台线程操作 */ public void run() { try { errors.setText(""); spider = new Spider(this); spider.clear(); base = new URL(url.getText()); spider.addURL(base); spider.begin(); Runnable doLater = new Runnable() { public void run() { begin.setText("Begin"); } }; SwingUtilities.invokeLater(doLater); backgroundThread=null; } catch ( MalformedURLException e ) { UpdateErrors err = new UpdateErrors(); err.msg = "错误地址。"; SwingUtilities.invokeLater(err); } } /** *当找到某一URL时由蜘蛛调用,在此验证链接。 * *参数base是找到链接时的页面 *参数url是链接地址 */ public boolean spiderFoundURL(URL base,URL url) { UpdateCurrentStats cs = new UpdateCurrentStats(); cs.msg = url.toString(); SwingUtilities.invokeLater(cs); if ( !checkLink(url) ) { UpdateErrors err = new UpdateErrors(); err.msg = url+"(on page " + base + ")\n"; SwingUtilities.invokeLater(err); badLinksCount++; return false; } goodLinksCount++; if ( !url.getHost().equalsIgnoreCase(base.getHost()) ) return false; else return true; } /** *当发现URL错误时调用 * *参数url是导致错误的URL */ public void spiderURLError(URL url) { } /** *由内部调用检查链接是否有效 * *参数url是被检查的链接 *返回True表示链接正常有效 */ protected boolean checkLink(URL url) { try { URLConnection connection = url.openConnection(); connection.connect(); return true; } catch ( IOException e ) { return false; } } /** *当蜘蛛找到电子邮件地址时调用 * *参数email为找到的电子邮件地址 */ public void spiderFoundEMail(String email) { } /** *以线程安全方式更新错误信息的内部类 */ class UpdateErrors implements Runnable { public String msg; public void run() { errors.append(msg); } } /** *以线程安全方式更新当前状态信息 */ class UpdateCurrentStats implements Runnable { public String msg; public void run() { current.setText("当前进度:" + msg ); goodLinksLabel.setText("正常链接:" + goodLinksCount); badLinksLabel.setText("死链接:" + badLinksCount); } } }

 

0
相关文章