hdp的java代码,非参数主题模型,对文章主题的提取。
代码片段和文件信息
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2013-05-07 22:02 IldaGibbs
文件 295 2013-05-07 16:31 IldaGibbs.classpath
文件 6148 2013-05-07 22:02 IldaGibbs.DS_Store
目录 0 2013-05-07 22:02 __MACOSX
目录 0 2013-05-07 22:02 __MACOSXIldaGibbs
文件 82 2013-05-07 22:02 __MACOSXIldaGibbs._.DS_Store
文件 368 2013-05-07 16:31 IldaGibbs.project
目录 0 2013-05-07 16:31 IldaGibbs.settings
文件 587 2013-05-07 16:31 IldaGibbs.settingsorg.eclipse.jdt.core.prefs
目录 0 2013-05-07 19:37 IldaGibbsin
文件 6148 2013-05-07 19:36 IldaGibbsin.DS_Store
目录 0 2013-05-07 16:32 IldaGibbsinorg
目录 0 2013-05-07 16:32 IldaGibbsinorgknowceans
目录 0 2013-05-07 16:33 IldaGibbsinorgknowceanscorpus
文件 4832 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusCorpusResolver.class
文件 4146 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusDisjointDocTerms.class
文件 5595 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusDocument.class
文件 269 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusICorpus.class
文件 512 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusILabelCorpus.class
文件 284 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusISplitCorpus.class
文件 196 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusITermCorpus.class
文件 6398 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusLabelNumCorpus.class
文件 12362 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusNumCorpus.class
文件 1530 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusVisCorpus.class
目录 0 2013-05-07 16:32 IldaGibbsinorgknowceans opics
目录 0 2013-05-07 16:33 IldaGibbsinorgknowceans opicssimple
文件 14261 2013-05-07 19:37 IldaGibbsinorgknowceans opicssimpleIldaGibbs.class
文件 175 2013-05-07 19:37 IldaGibbsinorgknowceans opicssimpleISimpleGibbs.class
文件 149 2013-05-07 19:37 IldaGibbsinorgknowceans opicssimpleISimplePpx.class
文件 187 2013-05-07 19:37 IldaGibbsinorgknowceans opicssimpleISimpleQueryGibbs.class
文件 8391 2013-05-07 19:37 IldaGibbsinorgknowceans opicssimpleLdaGibbs.class
............此处省略102个文件信息
/*
* (C) Copyright 2005-2011 Gregor Heinrich (gregor :: arbylon : net)
* (This file is part of the knowceans-ilda experimental software package
*/
/*
* knowceans-ilda is free software; you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by the Free
* Software Foundation; either version 3 of the License or (at your option)
* any later version.
*/
/*
* knowceans-ilda is distributed in the hope that it will be useful but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
*/
/*
* You should have received a copy of the GNU General Public License along with
* this program; if not write to the Free Software Foundation Inc. 59 Temple
* Place Suite 330 Boston MA 02111-1307 USA
*/
package org.knowceans.corpus;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
/**
* CorpusResolver resolves indices into names.
*
* @author gregor
*/
public class CorpusResolver {
public static void main(String[] args) {
CorpusResolver cr = new CorpusResolver(“nips/nips“);
System.out.println(cr.getAuthor(2));
System.out.println(cr.getLabel(20));
System.out.println(cr.getDoc(501));
System.out.println(cr.getTerm(1));
System.out.println(cr.getTermId(cr.getTerm(1)));
}
public final String[] EXTENSIONS = { “docs“ “vocab“
“authors.key“ “labels.key“ “vols.key“ “docnames“ };
HashMap termids;
String[][] data = new String[EXTENSIONS.length][];
String filebase;
private boolean parmode;
public CorpusResolver(String filebase) {
this(filebase false);
}
/**
* control paragraph mode (possibly different vocabulary)
*
* @param filebase
* @param parmode
*/
public CorpusResolver(String filebase boolean parmode) {
this.parmode = parmode;
this.filebase = filebase;
for (int i = 0; i < EXTENSIONS.length; i++) {
String base = filebase;
// read alternative vocabulary for paragraph mode
if (parmode && EXTENSIONS[i].equals(“vocab“)) {
base += “.par“;
}
File f = new File(base + “.“ + EXTENSIONS[i]);
if (f.exists()) {
data[i] = load(f);
}
}
}
/**
* load from file removing every information after a = sign in
* each line
*
* @param f
* @return array of label strings
*/
private String[] load(File f) {
String[] strings = null;
try {
ArrayList a = new ArrayList();
BufferedReader br = new BufferedReader(
new FileReader(f));
String line = null;
while ((line = br.readLine()) != null) {
line = line.trim();
int ii = line.indexOf(‘=‘);
if (ii > -1) {
属性 大小 日期 时间 名称
----------- --------- ---------- ----- ----
目录 0 2013-05-07 22:02 IldaGibbs
文件 295 2013-05-07 16:31 IldaGibbs.classpath
文件 6148 2013-05-07 22:02 IldaGibbs.DS_Store
目录 0 2013-05-07 22:02 __MACOSX
目录 0 2013-05-07 22:02 __MACOSXIldaGibbs
文件 82 2013-05-07 22:02 __MACOSXIldaGibbs._.DS_Store
文件 368 2013-05-07 16:31 IldaGibbs.project
目录 0 2013-05-07 16:31 IldaGibbs.settings
文件 587 2013-05-07 16:31 IldaGibbs.settingsorg.eclipse.jdt.core.prefs
目录 0 2013-05-07 19:37 IldaGibbsin
文件 6148 2013-05-07 19:36 IldaGibbsin.DS_Store
目录 0 2013-05-07 16:32 IldaGibbsinorg
目录 0 2013-05-07 16:32 IldaGibbsinorgknowceans
目录 0 2013-05-07 16:33 IldaGibbsinorgknowceanscorpus
文件 4832 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusCorpusResolver.class
文件 4146 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusDisjointDocTerms.class
文件 5595 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusDocument.class
文件 269 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusICorpus.class
文件 512 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusILabelCorpus.class
文件 284 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusISplitCorpus.class
文件 196 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusITermCorpus.class
文件 6398 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusLabelNumCorpus.class
文件 12362 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusNumCorpus.class
文件 1530 2013-05-07 19:37 IldaGibbsinorgknowceanscorpusVisCorpus.class
目录 0 2013-05-07 16:32 IldaGibbsinorgknowceans opics
目录 0 2013-05-07 16:33 IldaGibbsinorgknowceans opicssimple
文件 14261 2013-05-07 19:37 IldaGibbsinorgknowceans opicssimpleIldaGibbs.class
文件 175 2013-05-07 19:37 IldaGibbsinorgknowceans opicssimpleISimpleGibbs.class
文件 149 2013-05-07 19:37 IldaGibbsinorgknowceans opicssimpleISimplePpx.class
文件 187 2013-05-07 19:37 IldaGibbsinorgknowceans opicssimpleISimpleQueryGibbs.class
文件 8391 2013-05-07 19:37 IldaGibbsinorgknowceans opicssimpleLdaGibbs.class
............此处省略102个文件信息
版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件举报,一经查实,本站将立刻删除。
评论列表(条)