简单的网络邮箱抓取工具(附源码)

网络爬虫,搜索引擎为了让自己的数据库足够的强大,没日没夜的在网络上寻找信息,以使自己的信息更全面。大家都知道互联网信息是无穷的,是爆炸式的增长,他们不可能手工索取信息,他们写一小程序不停的在网络上获取信息,于是网络爬虫便产生了。

下面我用java实现了一个简单的专门抓取邮箱的小工具,做得非常粗略,仅供大家参考,

这是效果图

 

简单的网络邮箱抓取工具(附源码)

 啥也不说了直接上代码吧

 

import java.awt.BorderLayout;
import java.awt.Dimension;
import java.awt.Image;
import java.awt.MenuItem;
import java.awt.PopupMenu;
import java.awt.Toolkit;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.awt.event.WindowAdapter;
import java.awt.event.WindowEvent;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.swing.ImageIcon;
import javax.swing.JButton;
import javax.swing.JComboBox;
import javax.swing.JFrame;
import javax.swing.JLabel;
import javax.swing.JOptionPane;
import javax.swing.JPanel;
import javax.swing.JScrollPane;
import javax.swing.JTextArea;
import javax.swing.JTextField;
import javax.swing.UIManager;
import javax.swing.UnsupportedLookAndFeelException;
/**
 * 
 * @author http://javaflex.iteye.com/
 *
 */
public class MainFrm extends JFrame implements ActionListener {
	
	private static final long serialVersionUID = 1L;
	static int count=1;
	static int countUrl=1;
	JFrame frame;
	JButton b1;
	JButton b2;
	JTextArea t1;
	JTextField tf;
	JPanel panel;
	JScrollPane jScrollPane1;
	JLabel label;
	JComboBox comb;
	PopupMenu pm;
	List<Thread> t = new ArrayList<Thread>();
	static int m = 0;

	MainFrm into() {
		pm = new PopupMenu();
		MenuItem openItem = new MenuItem("1.打  开");
		MenuItem closeItem = new MenuItem("2.退  出");
		MenuItem aboutItem = new MenuItem("3.关  于");
		openItem.addActionListener(this);
		closeItem.addActionListener(this);
		aboutItem.addActionListener(this);
		pm.add(openItem);
		pm.add(closeItem);
		pm.add(aboutItem);
		String[] petStrings = { "Baidu", "Google", "Yahoo", "Bing", "Sogou" };
		comb = new JComboBox(petStrings);
		java.net.URL imgURL = MainFrm.class.getResource("mail.png");
		ImageIcon imageicon = new ImageIcon(imgURL);
		panel = new JPanel();
		tf = new JTextField(50);
		tf.setText("留下邮箱");
		label = new JLabel("关键字:");
		frame = new JFrame("邮箱抓取(注:抓取深度暂时默认为2)  QQ:三二八二四七六七六");
		frame.setIconImage(imageicon.getImage());
		b1 = new JButton("提取邮箱");
		b1.addActionListener(this);
		b2 = new JButton("停止抓取");
		b2.addActionListener(this);
		t1 = new JTextArea();
		t1.setLineWrap(true);
		jScrollPane1 = new JScrollPane(t1);
		jScrollPane1.setPreferredSize(new Dimension(200, 200));
		  this.setDefaultCloseOperation(DO_NOTHING_ON_CLOSE);
		frame.addWindowListener(new WindowAdapter() { // 窗口关闭事件
					public void windowClosing(WindowEvent e) {
						System.exit(0);
					};
					public void windowIconified(WindowEvent e) { // 窗口最小化事件
						frame.setVisible(false);
						systemTray();
					}
				});
		panel.add(label);
		panel.add(tf);
		panel.add(comb);

		panel.add(b1);
		panel.add(b2);
		frame.getContentPane().add(panel, BorderLayout.NORTH);

		frame.getContentPane().add(jScrollPane1, BorderLayout.CENTER);

		frame.setSize(300, 400);

		frame.pack();

		frame.setVisible(true);
		Dimension winSize = Toolkit.getDefaultToolkit().getScreenSize();
		frame.setLocation((winSize.width - frame.getWidth()) / 2,
				(winSize.height - frame.getHeight()) / 2);
		frame.setAlwaysOnTop(true);
		return this;
	}

	public static void main(String[] args) throws ClassNotFoundException,
			InstantiationException, IllegalAccessException,
			UnsupportedLookAndFeelException {
		// TODO Auto-generated method stub
		UIManager.setLookAndFeel(UIManager.getSystemLookAndFeelClassName());
		new MainFrm().into().systemTray();
		

	}

	@SuppressWarnings({ "unchecked", "deprecation", "static-access" })
	@Override
	public void actionPerformed(ActionEvent e) {

		if ("提取邮箱".equals(e.getActionCommand())) {
			count=1;
			t1.setText("");
			// get("http://dzh.mop.com/whbm/20060109/4/lSgg8I6063c68aS3.shtml");
			String http = "";
			int combo = (comb.getSelectedIndex());
			switch (combo) {
			case 0:
				http = "http://www.baidu.com/s?wd=";
				break;
			case 1:
				http = "http://www.google.com.hk/search?num=50&q=";
				break;
			case 2:
				http = "http://www.yahoo.cn/s?q=";
				break;
			case 3:
				http = "http://cn.bing.com/search?q=";
				break;
			case 4:
				http = "http://www.sogou.com/web?query=";
				break;
			default:
				http = "http://www.baidu.com/s?wd=";
				break;
			}

			final List<Map> list = get(http + tf.getText());
			m = list.size();
			for (int i = 0, n = list.size(); i < n; i++) {
				final Map map = list.get(i);
				Thread tt = new Thread() {
					public void run() {
						Iterator<String> iterator = map.values().iterator();
						while (iterator.hasNext()) {
							String u=iterator.next();
							get(u);
						}
					}
				};
				t.add(tt);
				tt.start();
			}

		} else if ("终止抓取".equals(e.getActionCommand())) {
			for (int i = 0; i < t.size(); i++) {
				t.get(i).stop();
			}
		} else if ("1.打  开".equals(e.getActionCommand())) {
			frame.setVisible(true);
			frame.setExtendedState(frame.NORMAL);
		} else if ("2.退  出".equals(e.getActionCommand())) {
			System.exit(0);
		}else if ("3.关  于".equals(e.getActionCommand())) {
			JOptionPane.showMessageDialog(null, "本程序仅供初学参考 QQ:三二八二四七六七六");
		}

	}

	@SuppressWarnings("unchecked")
	public List<Map> get(String urlStr) {
		List<Map> list = new ArrayList<Map>();
		try {
			URL url = new URL(urlStr);

			URLConnection rulConnection = url.openConnection();
			HttpURLConnection httpUrlConnection = (HttpURLConnection) rulConnection;
			httpUrlConnection.setRequestProperty("User-Agent",
					"Mozilla/4.0 (compatible; MSIE 5.0; Windows NT; DigExt)");
			BufferedReader br = new BufferedReader(new InputStreamReader(
					httpUrlConnection.getInputStream()));

			String line = "";
			while ((line = br.readLine()) != null) {
				Map map = pr(line);
				list.add(map);

			}
		} catch (FileNotFoundException e) {
			//e.printStackTrace();
		} catch (IOException e) {
			//e.printStackTrace();
		} finally {
			m--;
			if (m <= 0) {
				// JOptionPane.showMessageDialog(null, "提取结束");
			}
		}

		return list;
	}

	@SuppressWarnings("unchecked")
	public Map pr(String aa) {
		Pattern pattern = Pattern
				.compile("[a-zA-Z0-9_-][email protected][a-zA-Z0-9_-]+(\\.[a-zA-Z0-9_-]+)+");
		Pattern pattern2 = Pattern
				.compile("(http|ftp|https):\\/\\/[\\w\\-_]+(\\.[\\w\\-_]+)+([\\w\\-\\.,@?^=%&amp;:/~\\+#]*[\\w\\-\\@?^=%&amp;/~\\+#])?");
		Matcher matcher = pattern.matcher(aa);
		Map<String, String> emailMap = new HashMap<String, String>();

		Matcher matcher2 = pattern2.matcher(aa);
		Map<String, String> httpMap = new HashMap<String, String>();
		while (matcher2.find()) {
			httpMap.put(matcher2.group(), matcher2.group());
		}
		while (matcher.find()) {
			emailMap.put(matcher.group(), matcher.group());
		}
		Iterator<String> iterator = emailMap.values().iterator();
		while (iterator.hasNext()) {
			String str = iterator.next();
			t1.append("第"+(count++)+"个:"+str + "\r\n");
		}
		return httpMap;
	}

	public void systemTray() {
		try {
			if (java.awt.SystemTray.isSupported()) {
				final java.awt.SystemTray st = java.awt.SystemTray
						.getSystemTray();
				Image image = Toolkit.getDefaultToolkit().getImage(
						getClass().getResource("email_go.png"));
				final java.awt.TrayIcon ti = new java.awt.TrayIcon(image);
				ti.setToolTip("邮箱抓取");
				ti.setPopupMenu(pm);
				st.add(ti);
			}
		} catch (Exception e) {
		}
	}
	public String toString(){
		new MainFrm().into();
		return null;
	}

}

 @author http://javaflex.iteye.com/

 

自动发送邮件的功能(待续)