用Objective-C HTMLParser解析HTML文档
代码:https://github.com/zootreeves/Objective-C-HMTL-Parser
?
01
02
03
04
05
06
07
08
09
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
|
NSString *str = [ NSString stringWithContentsOfURL:[ NSURL URLWithString: @"http://vip.astro.sina.com.cn/astro/view/aries/day/20140808" ] encoding: NSUTF8StringEncoding error: nil ];
NSError *error;
//预先步骤
//1. 在工程中添加libxml2的库(Build Phase -> Link binary with libraries -> +)
//2. 在Header Search Path中添加/usr/include/libxml2
//解析HTML文档
HTMLParser *parser = [[HTMLParser alloc] initWithString:str error:&error];
if (error) {
NSLog ( @"%@" , error);
return ;
}
//获取HTML的body部分
HTMLNode *node = [parser body];
//根据HTML节点的属性查找节点
NSArray *tabs = [node findChildrenWithAttribute: @"class" matchingName: @"tab" allowPartial: YES ];
for (HTMLNode *n in tabs) {
//根据节点名称查找子节点
HTMLNode *h5 = [n findChildTag: @"h5" ];
HTMLNode *img = [n findChildTag: @"img" ];
HTMLNode *p = [n findChildTag: @"p" ];
//获取的属性值
NSLog ( @"%@" , [img getAttributeNamed: @"src" ]);
//获取节点内容
NSLog ( @"%@: %@" , [h5 contents], [p contents]);
}
|