知识图谱入门pdf


知识图谱入门pdf
资源截图
代码片段和文件信息
import java.io.*;
import java.util.HashMap;
import java.util.HashSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

/**
 * Created by Dell on 2017/11/3.
 */
public class Assignment4Class3 {

    //任务2中过滤实体用的可选属性
    enum CleanAttr {
        ABSTRACT(“摘要“0) CATEGORY(“类别“1) NAME(“名字“2) SECTION(“宗派“3);

        private String name ;
        private int index ;

        private CleanAttr( String name  int index ){
            this.name = name ;
            this.index = index ;
        }

        public String getName() {
            return name;
        }
        public int getIndex() {
            return index;
        }
    }

    /**********************************************************************
     * 给定字符串与正则表达式,打印所有匹配的子串
     * String str : 带匹配的字符串
     * String regex : 模板(正则表达式)
     **********************************************************************/
    public void extract(String str String regex) {
        Pattern pattern = Pattern.compile(regex);
        Matcher matcher = pattern.matcher(str);
        while (matcher.find()) {
            System.out.println(matcher.group());
        }
    }

    /**********************************************************************
     * 给定知识库的abstract文件与正则表达式,使用正则从abstract中抽取属性值,每抽出一条属性值打印一行abstract一行属性的主语、宾语对
     * String input : 实体abstract属性文件的完整路径
     * String regex : 模板
     **********************************************************************/
    public void extractFromFile(String input String regex) throws IOException {
        Pattern pattern = Pattern.compile(regex);
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(input) “utf-8“));
        String line = ““;
        while ((line=br.readLine())!=null) {
            String subject = line.split(“/resource/“)[1].split(“> <“)[0];
            String sentence = line.split(“> ““)[1].split(“““)[0];
            Matcher matcher = pattern.matcher(sentence);
            while (matcher.find()) {
                System.out.println(“sentence: “+sentence);
                System.out.println(“relation: “+subject+“ : “+matcher.group());
            }
        }
        br.close();
    }

    /**********************************************************************
     * 给定人工标注文件路径和阈值,被标注为正确实体的次数不小于阈值则被视为正确实体返回,否则被视为错误实体打印出来
     * String path : 人工标注结果文件的路径
     * int threshold : 阈值
     **********************************************************************/
    public HashSet clean(String path int threshold) throws IOException {
        HashSet result = new HashSet<>();
        HashMap countMap = new HashMap<>();
        File[] fileList = new File(path).listFiles();
        for (File file:fileList){
            BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(path+file.getName())“utf-8“));
            String line = ““;
            while ((li

 属性            大小     日期    时间   名称
----------- ---------  ---------- -----  ----

     文件        273  2017-11-03 18:41  资料AssignmentAssignment.ideamisc.xml

     文件        260  2017-11-03 18:40  资料AssignmentAssignment.ideamodules.xml

     文件      31996  2017-11-03 22:51  资料AssignmentAssignment.ideaworkspace.xml

     文件        502  2017-11-03 18:42  资料AssignmentAssignmentAssignment.iml

     文件     632610  2017-11-03 20:23  资料AssignmentAssignmentoutproductionAssignmentAssignment1abstracts.ttl

     文件    1773471  2017-11-03 20:23  资料AssignmentAssignmentoutproductionAssignmentAssignment2abstracts.ttl

     文件     152910  2017-11-03 20:23  资料AssignmentAssignmentoutproductionAssignmentAssignment2aliases.ttl

     文件    2292411  2017-11-03 20:23  资料AssignmentAssignmentoutproductionAssignmentAssignment2categories.ttl

     文件     199701  2017-11-03 20:23  资料AssignmentAssignmentoutproductionAssignmentAssignment2entities.txt

     文件      86476  2017-11-03 20:23  资料AssignmentAssignmentoutproductionAssignmentAssignment2entities_labeled1.txt

     文件     142152  2017-11-03 20:23  资料AssignmentAssignmentoutproductionAssignmentAssignment2entities_labeled2.txt

     文件      69384  2017-11-03 20:23  资料AssignmentAssignmentoutproductionAssignmentAssignment2entities_labeled3.txt

     文件     101518  2017-11-03 20:23  资料AssignmentAssignmentoutproductionAssignmentAssignment2entities_labeled4.txt

     文件     101740  2017-11-03 20:23  资料AssignmentAssignmentoutproductionAssignmentAssignment2sections.ttl

     文件       1524  2017-11-03 22:51  资料AssignmentAssignmentoutproductionAssignmentAssignment4Class3$CleanAttr.class

     文件       6349  2017-11-03 22:51  资料AssignmentAssignmentoutproductionAssignmentAssignment4Class3.class

     文件       3073  2017-11-03 22:48  资料AssignmentAssignment
eadme.md

     文件     632610  2017-08-09 19:16  资料AssignmentAssignment
esourceAssignment1abstracts.ttl

     文件    1773471  2017-08-14 10:56  资料AssignmentAssignment
esourceAssignment2abstracts.ttl

     文件     152910  2017-10-31 18:07  资料AssignmentAssignment
esourceAssignment2aliases.ttl

     文件    2292411  2017-08-14 10:58  资料AssignmentAssignment
esourceAssignment2categories.ttl

     文件     199701  2017-08-14 10:37  资料AssignmentAssignment
esourceAssignment2entities.txt

     文件      86476  2017-08-14 10:38  资料AssignmentAssignment
esourceAssignment2entities_labeled1.txt

     文件     142152  2017-08-14 10:39  资料AssignmentAssignment
esourceAssignment2entities_labeled2.txt

     文件      69384  2017-08-14 10:39  资料AssignmentAssignment
esourceAssignment2entities_labeled3.txt

     文件     101518  2017-08-14 10:39  资料AssignmentAssignment
esourceAssignment2entities_labeled4.txt

     文件     101740  2017-10-31 18:56  资料AssignmentAssignment
esourceAssignment2sections.ttl

     文件       8455  2017-11-03 22:50  资料AssignmentAssignmentsrcAssignment4Class3.java

     文件        182  2017-11-03 20:25  资料Assignment\__MACOSX._Assignment

     文件        182  2017-11-03 18:41  资料Assignment\__MACOSXAssignment.idea._misc.xml

............此处省略405个文件信息

版权声明:本文内容由互联网用户自发贡献,该文观点仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌抄袭侵权/违法违规的内容, 请发送邮件举报,一经查实,本站将立刻删除。

发表评论

评论列表(条)