C语言词法分析:C#源码
今天继续研究代码解析的算法
这个是算法流程图
有图解可能更直观一点;
以下是c#源码:
1
using System;
2
using System.IO;
3
using System.Text;
4
using System.Windows.Forms;
5
using System.Collections;
6
7
namespace CodeFormatter
{
8
/**//// <summary>
9
/// CodeFormatterFactory 的摘要说明。
10
/// c 代码解析,不支持中文
11
/// </summary>
12
public class CodeFormatterFactory
{
13
/**//*源代码*/
14
private string sourceCode = "";
15
16
/**//*C语言所有关键字,共32个*/
17
ArrayList KeyWordList = new ArrayList();
18
19
/**//*运算、限界符*/
20
ArrayList LimitList = new ArrayList();
21
22
/**//*常量表*/
23
ArrayList ConstList = new ArrayList();
24
25
/**//*标识符*/
26
ArrayList IdentifierList = new ArrayList();
27
28
/**//*输出*/
29
ArrayList OutputList = new ArrayList();
30
31
public CodeFormatterFactory()
{
32
//
33
// TODO: 在此处添加构造函数逻辑
34
//
35
init();
36
}
37
38
public string SourceCode
{
39
get
{return this.sourceCode;}
40
set
{this.sourceCode =value;}
41
}
42
43
public string ParseMessages
{
44
get
{
45
string pm = "";
46
47
IEnumerator ie = this.OutputList.GetEnumerator();
48
while ( ie.MoveNext() )
49
pm += ie.Current.ToString() + "\r\n";
50
return pm;
51
}
52
}
53
54
private void init()
{
55
/**//*C语言所有关键字,共32个*/
56
string[] key=new string[]
{" ","auto","break","case","char","const","continue","default","do","double",
57
"else","enum","extern","float","for","goto","if","int","long","register",
58
"return","short","signed","sizeof","static","struct","switch","typedef",
59
"union","unsigned","void","volatile","while"};
60
/**//*运算、限界符*/
61
string[] limit=new string[]
{" ","(",")","[","]","->",".","!","++","--","&","~",
62
"*","/","%","+","-","<<",">>","<","<=",">",">=","==","!=","&&","||",
63
"=","+=","-=","*=","/=",",",";","{","}","#","_","'"};
64
65
this.KeyWordList.Clear();
66
this.KeyWordList.TrimToSize();
67
for(int i=1;i<key.Length;i++)
68
this.KeyWordList.Add(key[i]);
69
70
this.LimitList.Clear();
71
this.LimitList.TrimToSize();
72
for(int i=1;i<limit.Length;i++)
73
this.LimitList.Add(limit[i]);
74
75
this.ConstList.Clear();
76
this.ConstList.TrimToSize();
77
78
this.IdentifierList.Clear();
79
this.IdentifierList.TrimToSize();
80
81
this.OutputList.Clear();
82
this.OutputList.TrimToSize();
83
}
84
85
/**//*******************************************
86
* 十进制转二进制函数
87
*******************************************/
88
private string dtb(string buf)
{
89
int[] temp= new int[20];
90
string binary = "";
91
int val=0,i=0;
92
93
/**//*先将字符转化为十进制数*/
94
try
{
95
val = Convert.ToInt32(buf);
96
}catch
{
97
val = 0;
98
}
99
100
if(val==0)
{
101
return(val.ToString());
102
}
103
104
i=0;
105
while(val!=0)
{
106
temp[i++]=val%2;
107
val/=2;
108
}
109
110
binary = "";
111
for(int j=0;j<=i-1;j++)
112
binary += (char)(temp[i-j-1]+48);
113
114
return(binary);
115
}
116
117
/**//*******************************************
118
* 根据不同命令查表或造表函数
119
*******************************************/
120
private int find(string buf,int type,int command)
{
121
int number=0;
122
string temp;
123
124
IEnumerator ie = null;
125
ArrayList al = null;
126
switch(type)
{
127
case 1://关键字表
128
ie = this.KeyWordList.GetEnumerator();
129
break;
130
case 2://标识符表
131
ie = this.IdentifierList.GetEnumerator();
132
break;
133
case 3://常数表
134
ie = this.ConstList.GetEnumerator();
135
break;
136
case 4://运算、限界符表
137
ie = this.LimitList.GetEnumerator();
138
break;
139
}
140
141
if(ie!=null)
142
while (ie.MoveNext())
{
143
temp = ie.Current.ToString();
144
if(temp.Trim().ToLower()==buf.Trim().ToLower())
{
145
return number;
146
}
147
number ++;
148
}
149
150
if(command==1)
{
151
/**//*找不到,当只需查表,返回0,否则还需造表*/
152
return 0;
153
}
154
155
switch(type)
{
156
case 1: al = this.KeyWordList;break;
157
case 2: al = this.IdentifierList;break;
158
case 3: al = this.ConstList;break;
159
case 4: al = this.LimitList;break;
160
}
161
if(al!=null)
162
al.Add(buf);
163
164
return number + 1;
165
}
166
/**//*******************************************
167
* 数字串处理函数
168
*******************************************/
169
private void cs_manage(string buffer)
{
170
string binary = dtb(buffer);
171
int result = find(binary,3,2);
172
this.OutputList.Add(String.Format("{0}\t\t\t3\t\t\t{1}",buffer,result));
173
}
174
175
/**//*******************************************
176
* 字符串处理函数
177
*******************************************/
178
private void ch_manage(string buffer)
{
179
int result = find(buffer,1,1);
180
if(result!=0)
{
181
this.OutputList.Add(String.Format("{0}\t\t\t1\t\t\t{1}",buffer,result));
182
}else
{
183
result = find(buffer,2,2);
184
this.OutputList.Add(String.Format("{0}\t\t\t2\t\t\t{1}",buffer,result));
185
}
186
}
187
188
/**//*******************************************
189
* 出错处理函数
190
*******************************************/
191
private void er_manage(char error,int lineno)
{
192
this.OutputList.Add(String.Format("错误关键字: {0} ,所在行: {1}",error,lineno));
193
}
194
195
/**//*******************************************
196
* 转换Char数组为string
197
******************************************/
198
private string joinString(char[] array,int Length)
{
199
string s = "";
200
if(array.Length>0)
201
for(int i=0;i<Length;i++)
{
202
if(array[i]!='\0')
{
203
s+=array[i];
204
}else
{
205
break;
206
}
207
}
208
return s;
209
}
210
211
private char getchc(ref int n)
{
212
char[] c = sourceCode.ToCharArray();
213
if(n<c.Length)
{
214
char r = c[n];
215
n++;
216
return r;
217
}
218
return sourceCode[sourceCode.Length-1];
219
}
220
/**//*******************************************
221
* 扫描程序
222
********************************************/
223
public void Parse()
{
224
//StreamWriter fpout = null;
225
char ch ;
226
int i=0,line=1;
227
int count,result,errorno=0;
228
char[] array = new char[30];
229
string word= "";
230
231
/**//*按字符依次扫描源程序,直至结束*/
232
int n = 0;
233
234
while(n<sourceCode.Length-1)
{
235
i = 0;
236
ch = getchc(ref n);
237
/**//*以字母开头*/
238
if(((ch>='A')&&(ch<='Z'))||((ch>='a')&&(ch<='z'))||(ch=='_'))
{
239
while(((ch>='A')&&(ch<='Z'))||((ch>='a')&&(ch<='z'))||(ch=='_')||((ch>='0')&&(ch<='9')))
{
240
array[i++]=ch;
241
ch = getchc(ref n);
242
}
243
array[i++] = '\0';
244
word = joinString(array,array.Length);
245
ch_manage(word);
246
if(n<sourceCode.Length)n--;
247
}else if(ch>='0'&&ch<='9')
{
248
/**//*以数字开头*/
249
while(ch>='0'&&ch<='9')
{
250
array[i++]=ch;
251
ch = getchc(ref n);
252
}
253
array[i++] = '\0';
254
word=joinString(array,array.Length);
255
cs_manage(word);
256
if(n<sourceCode.Length)n--;
257
}
258
else if((ch==' ')||(ch=='\t'))
259
/**//*消除空格符和水平制表符*/
260
;
261
else if(ch=='\n')
262
/**//*消除回车并记录行数*/
263
line++;
264
else if(ch=='/')
{
265
/**//*消除注释*/
266
ch = getchc(ref n);
267
if(ch=='=')
{
268
/**//*判断是否为‘/=’符号*/
269
this.OutputList.Add(String.Format("/=\t\t\t4\t\t\t32"));
270
}
271
else if(ch!='*')
{
272
/**//*若为除号,写入输出*/
273
this.OutputList.Add(String.Format("/\t\t\t4\t\t\t13"));
274
n--;
275
} else if(ch=='*')
{
276
/**//*若为注释的开始,消除包含在里面的所有字符*/
277
count=0;
278
ch = getchc(ref n);
279
while(count!=2)
{
280
/**//*当扫描到‘*’且紧接着下一个字符为‘/’才是注释的结束*/
281
count=0;
282
while(ch!='*')
283
ch = getchc(ref n);
284
count++;
285
ch = getchc(ref n);
286
if(ch=='/')
287
count++;
288
else
289
ch = getchc(ref n);
290
}
291
}
292
}
293
else if(ch=='"')
{
294
/**//*消除包含在双引号中的字符串常量*/
295
this.OutputList.Add(String.Format("{0}\t\t\t4\t\t\t37",ch));
296
while(ch!='"')
297
ch = getchc(ref n);
298
this.OutputList.Add(String.Format("{0}\t\t\t4\t\t\t37",ch));
299
}
300
else
{
301
/**//*首字符为其它字符,即运算限界符或非法字符*/
302
array[0]=ch;
303
/**//*再读入下一个字符,判断是否为双字符运算、限界符*/
304
ch = getchc(ref n);
305
/**//*若该字符非结束符*/
306
if(n<sourceCode.Length)
{
307
array[1]=ch;
308
array[2] = '\0';
309
word = joinString(array,2);
310
result=find(word,4,1); /**//*先检索是否为双字符运算、限界符*/
311
if(result==0)
{
312
/**//*若不是*/
313
array[2] = '\0';
314
word = joinString(array,1);
315
result=find(word,4,1);
316
/**//*检索是否为单字符运算、限界符*/
317
if(result==0)
{
318
/**//*若还不是,则为非法字符*/
319
er_manage(array[0],line);
320
errorno++;
321
n--;
322
}
323
else
{
324
/**//*若为单字符运算、限界符,写入输出并将扫描指针回退一个字符*/
325
this.OutputList.Add(String.Format("{0}\t\t\t4\t\t\t{1}\t",word,result));
326
n--;
327
}
328
}
329
else
{
330
/**//*若为双字符运算、限界符,写输出*/
331
this.OutputList.Add(String.Format("{0}\t\t\t4\t\t\t{1}",word,result));
332
}
333
}
334
else
{
335
/**//*若读入的下一个字符为结束符*/
336
array[2] = '\0';
337
word = joinString(array,1);
338
/**//*只考虑是否为单字符运算、限界符*/
339
result=find(word,4,1);
340
/**//*若不是,转出错处理*/
341
if(result==0)
342
er_manage(array[0],line);
343
else
{
344
/**//*若是,写输出*/
345
this.OutputList.Add(String.Format("{0}\t\t\t4\t\t\t{1}",word,result));
346
}
347
}
348
}
349
ch = getchc(ref n);
350
}
351
/**//*报告错误字符个数*/
352
this.OutputList.Add(String.Format("\n共有 {0} 个错误.\n",errorno));
353
}
354
355
}
356
}
357
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
代码可能似曾相识,因为我是参考的一篇C的代码;
这里下载工程源码(带C代码)
2005年4月22日 S.F.
出处:http://www.cnblogs.com/chinasf/archive/2005/04/22/143449.html