斯坦福 算法2 第二周作业
斯坦福 Algorithms: Design and Analysis 2 第二周作业
来自斯坦福网站的Algorithms: Design and Analysis,与目前coursera上的版本内容没有变化,不过时间安排略有不同。
1. Problem Set 2
可以有反例。但我貌似没咋想出来。错误的原因大概是因为用在了有向图上面。
2. Programming Assignment 2
问题1:用Kruskal算法实现课上讲的聚类算法。
代码如下:
#include <iostream>
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include <fstream>
#include <sstream>
#include <set>
#include <string>
#include <ctime>
#include <algorithm>
using namespace std;
#define INT_MAX 2147483647
void readGraph(string fileName,vector<vector<int>>& edges,int& size) {
cout << "read data from "<< fileName <<endl;
ifstream input(fileName);
string line;
int u,v,weight;
input >> size;
while(input >> u) {
input >> v >> weight;
edges.push_back({u,v,weight});
}
}
class UnionFind
{
vector<int> records;
int counts;
public:
UnionFind(int n){
for(int i = 0; i <= n; i++)
records.push_back(-1); //根节点指向的值为负数表示的rank,index 0是多余的
counts = n;
};
int Find(int x) {
int cur = x, root = records[cur];
if(root > 0){
root = Find(root);
records[cur] = root; //path comression
return root;
}else
return cur; //返回指向负数的结点,也就是根节点
}
void Union(int a, int b) {
int s1 = Find(a), s2 = Find(b);
if(s1 == s2) return;//同一集合无序合并
if(records[s1] < records[s2])//rank 不同时让rank小的指向rank大的,这里是负数表示所以反着写
records[s2] = s1;
else if(records[s2] < records[s1])
records[s1] = s2;
else{ //rank相同时候其中一个rank加一
records[s1] = s2;
records[s2] -= 1;
}
counts--;//合并后已有集合数量减一
}
int size() {
return counts;
}
};
int clustering(vector<vector<int>> edges, int n, int k) {
UnionFind uf = UnionFind(n);
auto cmp = [](vector<int>& a, vector<int>& b) {return a[2] < b[2];};
sort(edges.begin(),edges.end(),cmp);//按weight升序排列各个edge
int i = 0;
for(; i < edges.size() && uf.size() > k; i++) { //合并不同的集合直到只剩下k个
if(uf.Find(edges[i][0]) != uf.Find(edges[i][1]))
uf.Union(edges[i][0],edges[i][1]);
}
int res = 0;
for(int j = i; j < edges.size(); j++) {//找出剩下的跨越集合的边的最小weight作为结果
if(uf.Find(edges[j][0]) != uf.Find(edges[j][1])) {
res = edges[j][2];
break;
}
}
return res;
}
int main() {
string fileName = "clustering1.txt";
vector<vector<int>> edges;
int n;
readGraph(fileName,edges,n);
int res = clustering(edges,n,4);
cout << res << endl;
return 0;
}
答案是106。这里主要注意一下UnionFind的实现,用了Path Compression,leader结点指向的负值表示其集合的rank。
问题2:每个结点是个01表示的字符串,结点之间的距离是汉明距离。由于节点数太多不可能对所有的边进行排序。因此可以针对结点的特点来计算距离。
因为要最终的距离大于等于3,因此可以只合并距离小于3的边。其实挺好算的,每次改变结点中的某一位或者某两位就表示距离当前结点距离为1或2的另一个结点。
坑爹的地方是,这里读入的结点居然是有重复的。所以相当于还要考虑距离为0的边。很生气,debug很久才发现。我的处理是直接在输入时候去掉重复的结点。
#include <iostream>
#include <vector>
#include <unordered_map>
#include <unordered_set>
#include <fstream>
#include <sstream>
#include <set>
#include <string>
#include <ctime>
#include <algorithm>
using namespace std;
#define INT_MAX 2147483647
void readGraph(string fileName,vector<string>& nodes, unordered_map<string,int>& mp, int& size, int& length) {
cout << "read data from "<< fileName <<endl;
ifstream input(fileName);
string line;
int count=1;
nodes.push_back("");
input >> size >> length;
while(getline(input,line)) {
if(line.size() == 0 || mp.find(line) != mp.end())
continue;
length = line.size();//实际读取的每行长度,包括空格,未作处理
nodes.push_back(line);
mp[line] = count;
count++;
}
size = nodes.size()-1;
}
class UnionFind
{
vector<int> records;
int counts;
public:
UnionFind(int n){
for(int i = 0; i <= n; i++)
records.push_back(-1); //根节点指向的值为负数表示的rank,index 0是多余的
counts = n;
};
int Find(int x) {
int cur = x, root = records[cur];
if(root > 0){
root = Find(root);
records[cur] = root; //path comression
return root;
}else
return cur; //返回指向负数的结点,也就是根节点
}
void Union(int a, int b) {
int s1 = Find(a), s2 = Find(b);
if(s1 == s2) return;//同一集合无序合并
if(records[s1] < records[s2])//rank 不同时让rank小的指向rank大的,这里是负数表示所以反着写
records[s2] = s1;
else if(records[s2] < records[s1])
records[s1] = s2;
else{ //rank相同时候其中一个rank加一
records[s1] = s2;
records[s2] -= 1;
}
counts--;//合并后已有集合数量减一
}
int size() {
return counts;
}
};
int clustering(vector<string> nodes, unordered_map<string,int>& mp, int n, int length) {
UnionFind uf = UnionFind(n);
for(int i = 1; i <= n; i++) { //距离为1的边
string cur = nodes[i];
for(int k = 0; k < length; k+=2){
cur[k] = ('1'-cur[k])+'0';
if(mp.find(cur) != mp.end())
uf.Union(i,mp[cur]);
cur[k] = ('1'-cur[k])+'0';
}
if(i % 1000 == 0)
cout << "distance 1: "<< i << " uf.size(): "<< uf.size()<< endl;
}
for(int i = 1; i <= n; i++) { //距离为2的边
string cur = nodes[i];
for(int k = 0; k < length; k+=2){
cur[k] = ('1'-cur[k])+'0';
for(int l = k+2; l < length; l+=2){
cur[l] = ('1'-cur[l])+'0';
if(mp.find(cur) != mp.end())
uf.Union(i,mp[cur]);
cur[l] = ('1'-cur[l])+'0';
}
cur[k] = ('1'-cur[k])+'0';
}
if(i % 1000 == 0)
cout << "distance 2: "<< i << " uf.size(): "<< uf.size()<<endl;
}
return uf.size();
}
int main() {
string fileName = "clustering_big.txt";
vector<string> nodes;
unordered_map<string,int> mp;
int n,length;
readGraph(fileName,nodes,mp,n,length);
int res = clustering(nodes,mp,n,length);
cout << res << endl;
return 0;
}
答案是6118.