Java 将PubMed GEO DataSets中dataset类型的检索结果转化为Excel
检索结果举例
为进行生信分析,需要对检索结果转化为Excel并且以标签作为列。首先将检索结果Send to File转化成TXT格式,如下:
通过下面的程序进一步转为Excel(使用jxl包):
源代码:
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package ncbi;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import jxl.Workbook;
import jxl.write.Label;
import jxl.write.WritableSheet;
import jxl.write.WritableWorkbook;
/**
*
* @author wgyklh
*/
public class NCBI {
/**
* @param args the command line arguments
* @throws java.lang.Exception
*/
public static void main(String[] args) throws Exception {
// TODO code application logic here
File in = new File("C:\\Users\\Lenovo\\Desktop\\gds_result.txt");
File out = new File("C:\\Users\\Lenovo\\Desktop\\gds.result.xls");
WritableWorkbook wwb = Workbook.createWorkbook(out);
WritableSheet dataset = wwb.createSheet("DataSet", 0);
WritableSheet series = wwb.createSheet("Series", 1);
WritableSheet platform= wwb.createSheet("Platform", 2);
WritableSheet sample= wwb.createSheet("Sample", 3);
dataset.addCell(new Label(0, 0, "Number"));
dataset.addCell(new Label(1, 0, "Title"));
dataset.addCell(new Label(2, 0, "Introduction"));
dataset.addCell(new Label(3, 0, "Organism"));
dataset.addCell(new Label(4, 0, "Type"));
dataset.addCell(new Label(5, 0, "Platform"));
dataset.addCell(new Label(6, 0, "Series"));
dataset.addCell(new Label(7, 0, "Sample"));
dataset.addCell(new Label(8, 0, "FTP download"));
dataset.addCell(new Label(9, 0, "Accession"));
dataset.addCell(new Label(10, 0, "ID"));
series.addCell(new Label(0, 0, "Number"));
series.addCell(new Label(1, 0, "Title"));
series.addCell(new Label(2, 0, "Introduction"));
series.addCell(new Label(3, 0, "Organism"));
series.addCell(new Label(4, 0, "Type"));
series.addCell(new Label(5, 0, "Platform"));
series.addCell(new Label(6, 0, "Sample"));
series.addCell(new Label(7, 0, "FTP download"));
series.addCell(new Label(8, 0, "Accession"));
series.addCell(new Label(9, 0, "ID"));
BufferedReader reader = null;
String tempString = null;
int number = 1;
int index1 = 0;
int index2 = 0;
int index3 = 0;
try {
reader = new BufferedReader(new FileReader(in));
for (int n = 1; n <= 32; n++) {
tempString = reader.readLine();
tempString = reader.readLine();
System.out.println(tempString);
dataset.addCell(new Label(0, number, String.valueOf(number)));
dataset.addCell(new Label(1, number, tempString.substring(3)));
tempString = reader.readLine();
dataset.addCell(new Label(2, number, tempString));
tempString = reader.readLine();
dataset.addCell(new Label(3, number, tempString.substring(10)));
tempString = reader.readLine();
dataset.addCell(new Label(4, number, tempString.substring(6)));
tempString = reader.readLine();
index1 = tempString.indexOf("Series");
index2 = tempString.indexOf("Sample");
System.out.println(index1 + " " + index2);
dataset.addCell(new Label(5, number, tempString.substring(10, index1 - 1)));
dataset.addCell(new Label(6, number, tempString.substring(index1 + 8, index1 + 16)));
dataset.addCell(new Label(7, number, tempString.substring(index1 + 16, index2 - 1).trim()));
tempString = reader.readLine();
dataset.addCell(new Label(8, number, tempString.substring(14)));
tempString = reader.readLine();
index3 = tempString.indexOf("ID");
dataset.addCell(new Label(9, number, tempString.substring(20, 27)));
dataset.addCell(new Label(10, number, tempString.substring(index3 + 4)));
number++;
}
reader.close();
wwb.write();
wwb.close();
} catch (FileNotFoundException e) {
} catch (IOException e) {
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e) {
}
}
}
}
}