词法分析器

先展示下效果,输入一段代码,可以把每个关键字、运算符、括号等分出来,并连同其所在预先编号的表中的编号打印出来。预先编号的表笔者称为单词种别码表,具体在代码中一看就知。
词法分析器

算法设计

词法分析器

开发环境

Windows10+记事本+JDK8

代码

import javax.swing.*;
import java.awt.*;
import java.awt.event.*;
import java.util.*;

public class LexicalAnalysis{

	//为设计界面准备
	JFrame jFrame = new JFrame("词法分析");//窗口
	JTextArea jTextAreaTop = new JTextArea(12, 60);//文本框,用于输入代码
	JTextArea jTextAreaBottom1 = new JTextArea(12, 60);//文本框,用于显示分析结果
	JButton button1 = new JButton("点我");//按钮,点击后进行词法分析

	//存放单词种别码表
	private TreeMap<String, String> wordsTable;

	//存放分析结果,按照代码输入顺序分析词法,记录词法分析结果
	ArrayList<String> resultsKey = new ArrayList<>();
	ArrayList<String> resultsValue = new ArrayList<>();

	
	public LexicalAnalysis(){
		//空参构造函数
		//完成对单词种别码表的初始化
		wordsTable = new TreeMap<>();

		wordsTable.put("main", "1");
		wordsTable.put("int", "2");
		wordsTable.put("char", "3");
		wordsTable.put("if", "4");
		wordsTable.put("else", "5");
		wordsTable.put("for", "6");
		wordsTable.put("while", "7");
		wordsTable.put("id", "10");//标识符
		wordsTable.put("digit", "20");//数字
		wordsTable.put("=", "21");
		wordsTable.put("+", "22");
		wordsTable.put("-", "23");
		wordsTable.put("*", "24");
		wordsTable.put("/", "25");
		wordsTable.put("(", "26");
		wordsTable.put(")", "27");
		wordsTable.put("[", "28");
		wordsTable.put("]", "29");
		wordsTable.put("{", "30");
		wordsTable.put("}", "31");
		wordsTable.put(",", "32");
		wordsTable.put(":", "33");
		wordsTable.put(";", "34");
		wordsTable.put(">", "35");
		wordsTable.put("<", "36");
		wordsTable.put(">=", "37");
		wordsTable.put("<=", "38");
		wordsTable.put("==", "39");
		wordsTable.put("!=", "40");
		wordsTable.put("!", "666");
		wordsTable.put("\0", "1000");
		wordsTable.put("ERROR", "-1");
		
		/*测试用
		wordsTable.forEach((key, value)->{
			System.out.println(key);
		});
		*/

	}

	//程序入口
	public static void main(String[] args){
		new LexicalAnalysis().init();	
		
		
	}	
	
	

	public void init(){
		//************************完成界面设置************************	
		
		//box1放入文本框、按钮(用box2盛装)、文本框
		jFrame.setSize(new Dimension(1120, 500));

		//设置文本框自动换行
		jTextAreaTop.setLineWrap(true);
		jTextAreaBottom1.setLineWrap(true);
		
		//设置滚动条
		JScrollPane jsp1 = new JScrollPane(jTextAreaTop);
		JScrollPane jsp2 = new JScrollPane(jTextAreaBottom1);
		
		
		Box box1 = Box.createVerticalBox();
		box1.add(jsp1);
		//为了按钮不随输入文字移动,把按钮放在box2里,再把box2放入box1
		Box box2 = new Box(BoxLayout.X_AXIS);
		box2.add(button1);
		box1.add(box2);
		box1.add(jsp2);
				
		jFrame.add(box1);
		jFrame.setDefaultCloseOperation(JFrame.EXIT_ON_CLOSE);
		jFrame.pack();
		jFrame.setVisible(true);

		//************************完成界面设置************************
		
		//先给button1绑定监听器,后续分析结果的显示就靠他了
		button1.addActionListener(new MyListener());
		
				
	}

	//写一个button1的监听器
	class MyListener implements ActionListener{

		public void actionPerformed(ActionEvent event){
			
			resultsKey.clear();
			resultsValue.clear();
			jTextAreaBottom1.setText("");
			//把用户输入的代码拿过来,放入字符串strSrc
			String strSrc = jTextAreaTop.getText();
			
			//************************词法分析核心逻辑代码************************
			for(int i = 0; i < strSrc.length(); i++){
				
				char c = strSrc.charAt(i);
				String tmp = "" + c;

				//***(1)忽略空格、换行、制表符
				if(Character.isWhitespace(c) || c=='\n'){ //空白:空格,\t,\r,\n
					continue;
				}
				//***(2)以字母开头的,判断是关键字还是标识符
				else if(Character.isLetter(c)){
					int j = i + 1;
					for(; j < strSrc.length(); j++){
						char cc = strSrc.charAt(j);
						if(Character.isDigit(cc) || Character.isLetter(cc)){
							tmp += cc;
						}else{
							break;
						}
					}
					i=j-1;
					if(wordsTable.containsKey(tmp)){ //关键字
						resultsKey.add(wordsTable.get(tmp));
						resultsValue.add(tmp);
					}else{	//标识符
						resultsKey.add(wordsTable.get("id"));
						resultsValue.add(tmp);
					}
				}
				//****(3)以数字开头的,就拼数
				else if(Character.isDigit(c)){	
					int j = i + 1;
					for(; j < strSrc.length(); j++){
						char cc = strSrc.charAt(j);
						//如果以数字开头,并且包含字母,报告编译出错
						if(Character.isLetter(cc)){
							System.out.println("标识符不能以数字开头,编译出错");
							System.exit(-1);//强制非正常退出
						}
						
						if(Character.isDigit(cc)){
							tmp += cc;
							
						}else{	
							break;
						}

					}
					i=j-1;
					resultsKey.add(wordsTable.get("digit"));
					resultsValue.add(tmp);

				}
				//***(4)判断特殊的>=,<=,==,!=
				else if(wordsTable.containsKey(c + "")){
					if(i + 1 < strSrc.length() && strSrc.charAt(i + 1) == '='){
						tmp += strSrc.charAt(i + 1);
						resultsKey.add(wordsTable.get(tmp));
						resultsValue.add(tmp);
						i++;
					}
					else{
						resultsKey.add(wordsTable.get(tmp));
						resultsValue.add(tmp);
					}
				}
				else{
					System.out.println("存在非法字符" + c);
					System.exit(-1);//强制非正常退出
				}
				
			}
			
			//把分析好的结果写入下面的文本框
			for(int k = 0; k < resultsKey.size(); k++){
				jTextAreaBottom1.append("(" + resultsKey.get(k) + ", " + resultsValue.get(k) + ")\n");
				//System.out.print("(" + resultsKey.get(k) + ", " + resultsValue.get(k) + ")");
			}
			
			
		}
	}
}

说明

笔者仅仅对如下关键单词进行了识别:

!
!=
(
)
*
+
,
-
/
:
;
<
<=
=
==
>
>=
ERROR
[
]
char
digit
else
for
id
if
int
main
while
{
}