In Accord.Net如何使用One-Class SVM进行异常检测?
我试图通过在Accord.Net中使用OneclassSupportVectorLearning实现异常检测。我在训练过程中遇到了NullReference错误。以下是我在测试中的示例代码。 欣赏有人能帮助我解决这个问题。In Accord.Net如何使用One-Class SVM进行异常检测?
double[][] inputs =
{
new double[] { 0, 1, 1, 0 }, // 0
new double[] { 0, 1, 0, 0 }, // 0
new double[] { 0, 0, 1, 0 }, // 0
new double[] { 0, 1, 1, 0 }, // 0
new double[] { 0, 1, 0, 0 }, // 0
};
var oteacher = new OneclassSupportVectorLearning<ChiSquare,double[]>();
var k = oteacher.Learn(inputs); //NullReference error occur here.
编辑------------------------------------------ ---------------------------
基于Jstreet的评论,尝试下面的代码,但它在2-dim上工作,但在更高的失败尺寸。
static void Main(string[] args)
{
Random r = new Random(DateTime.Now.Millisecond);
int size = 1000;
int min = 45;
int max = 55;
double[][] inputs = new double[size][];
for (int i = 0; i < size; i++)
{
double[] d = new double[] { r.Next(min,max), r.Next(min,max), r.Next(min,max), r.Next(min,max) };
inputs[i] = d;
}
var oteacher = new OneclassSupportVectorLearning<ChiSquare>();
var k = oteacher.Learn(inputs);
double[][] test =
{
// normal
new double[] { 50, 53 , 50, 50},
new double[] { 49, 52 , 50, 50},
new double[] { 48, 51 , 50, 50},
new double[] { 47, 52 , 50, 50},
new double[] { 46, 53 , 50, 50},
// anomalies
new double[] { 50, 70, 70, 70 },
new double[] { 51, 69, 70, 70 },
new double[] { 52, 68, 70, 70 },
new double[] { 53, 67, 70, 70 },
new double[] { 54, 66, 70, 70 },
};
foreach (double[] d in test)
{
if (k.Decide(d) == true)
Console.WriteLine(" OK = {0}, {1}, {2}, {3}", d[0], d[1], d[2], d[3]);
else Console.WriteLine(" Anomaly = {0}, {1}, {2}, {3}", d[0], d[1], d[2], d[3]);
}
Console.ReadLine();
}
我建议你用一个2维数据集可让您看到的结果,并得到一点感觉实验:
static void Main(string[] args)
{
Random r = new Random(DateTime.Now.Millisecond);
int size = 100;
int min = 45;
int max = 55;
double[][] inputs = new double[size][];
for (int i = 0; i < size; i++)
{
double[] d = new double[] { r.Next(min,max), r.Next(min,max) };
inputs[i] = d;
}
var oteacher = new OneclassSupportVectorLearning<ChiSquare>();
var k = oteacher.Learn(inputs);
double[][] test =
{
// normal
new double[] { 50, 53 },
new double[] { 49, 52 },
new double[] { 48, 51 },
new double[] { 47, 52 },
new double[] { 46, 53 },
// anomalies
new double[] { 50, 70 },
new double[] { 51, 69 },
new double[] { 52, 68 },
new double[] { 53, 67 },
new double[] { 54, 66 },
};
foreach (double[] d in test)
{
if (k.Decide(d) == true)
Console.WriteLine(" OK = {0}, {1}", d[0], d[1]);
else Console.WriteLine(" Anomaly = {0}, {1}", d[0], d[1]);
}
Console.ReadLine();
}
此示例代码生成以下输出:
OK = 50, 53
OK = 49, 52
OK = 48, 51
OK = 47, 52
OK = 46, 53
Anomaly = 50, 70
Anomaly = 51, 69
Anomaly = 52, 68
Anomaly = 53, 67
Anomaly = 54, 66
这是相同结果的图形视图:
编辑:就像我说的,它需要一些试验。这是我对四维输入数据集的结果。注意,我降低每个维度如何变量,并保持相同的输入尺寸,100
static void Main(string[] args)
{
Random r = new Random(DateTime.Now.Millisecond);
int size = 100;
int min = 45;
int max = 50;
int min2 = 60;
int max2 = 65;
double[][] inputs = new double[size][];
for (int i = 0; i < size; i++)
{
double[] d = new double[] { r.Next(min, max), r.Next(min, max), r.Next(min, max), r.Next(min, max) };
inputs[i] = d;
}
var oteacher = new OneclassSupportVectorLearning<ChiSquare>();
var k = oteacher.Learn(inputs);
double[][] test =
{
// normal
new double[] { r.Next(min, max), r.Next(min, max), r.Next(min, max), r.Next(min, max) },
new double[] { r.Next(min, max), r.Next(min, max), r.Next(min, max), r.Next(min, max) },
new double[] { r.Next(min, max), r.Next(min, max), r.Next(min, max), r.Next(min, max) },
new double[] { r.Next(min, max), r.Next(min, max), r.Next(min, max), r.Next(min, max) },
new double[] { r.Next(min, max), r.Next(min, max), r.Next(min, max), r.Next(min, max) },
// anomalies
new double[] { r.Next(min2, max2), r.Next(min2, max2), r.Next(min2, max2), r.Next(min2, max2) },
new double[] { r.Next(min2, max2), r.Next(min2, max2), r.Next(min2, max2), r.Next(min2, max2) },
new double[] { r.Next(min2, max2), r.Next(min2, max2), r.Next(min2, max2), r.Next(min2, max2) },
new double[] { r.Next(min2, max2), r.Next(min2, max2), r.Next(min2, max2), r.Next(min2, max2) },
new double[] { r.Next(min2, max2), r.Next(min2, max2), r.Next(min2, max2), r.Next(min2, max2) },
};
foreach (double[] d in test)
{
if (k.Decide(d) == true)
Console.WriteLine("OK = {0}, {1}, {2}, {3}", d[0], d[1], d[2], d[3]);
else Console.WriteLine("Anomaly = {0}, {1}, {2}, {3}", d[0], d[1], d[2], d[3]);
}
Console.ReadLine();
}
而结果:
OK = 49, 46, 47, 49
OK = 49, 45, 45, 47
OK = 45, 45, 46, 47
OK = 47, 49, 47, 48
OK = 45, 45, 47, 48
Anomaly = 62, 60, 61, 63
Anomaly = 61, 63, 63, 64
Anomaly = 64, 60, 60, 64
Anomaly = 61, 64, 63, 63
Anomaly = 62, 60, 62, 62
再次感谢Jstreet。但是当我尝试增加数据维度时,它似乎无法正确预测真/假。 – alinm
尝试在输入中增加点数(**大小**)。 – jsanalytics
已经尝试了5000个输入样本,但它仍然给出了所有错误,通过下面的示例代码我发布。可能是OneclassSupportVectorLearning还不成熟吗? – alinm
更新到版本3.4.2 **-α**。 – jsanalytics
谢谢jstress!我尝试最新的预版本,现在它可以运行。当它完成学习时,我通过传回训练集来运行“k.Decide(输入)”方法。但是对于整个训练集来说它只是返回所有“假”(它假设全部是“真”)。我在这里检查了代码,根据在线文档它应该是有效的。感谢您能分享我更多关于此的提示。 – alinm