kr.ac.kaist.swrc.jhannanum.share
Class TagSet

java.lang.Object
  extended by kr.ac.kaist.swrc.jhannanum.share.TagSet

public class TagSet
extends java.lang.Object

Morpheme tag set.

Author:
Sangwon Park (hudoni@world.kaist.ac.kr), CILab, SWRC, KAIST

Field Summary
 java.lang.String author
          the author of tag set
 java.lang.String copyright
          the copyright of tag set
 java.lang.String date
          the last update date of tag set
 java.lang.String editor
          the editor of the tag set
 int[] indexTags
          the list of index tags
 int IRR_TYPE_B
          'ㅂ' irregular
 int IRR_TYPE_D
          'ㄷ' irregular
 int IRR_TYPE_H
          'ㅎ' irregular
 int IRR_TYPE_REO
          '러' irregular
 int IRR_TYPE_REU
          '르' irregular
 int IRR_TYPE_S
          'ㅅ' irregular
private  java.util.ArrayList<java.lang.String> irregularList
          the irregular rule list
 int iwgTag
          the start tag
 int numTag
          the number tag
static int PHONEME_TYPE_ALL
          phoneme type - all
static int TAG_SET_KAIST
          KAIST tag set
static int TAG_TYPE_ADJS
          tag type - adjective
static int TAG_TYPE_ALL
          tag type - all
static int TAG_TYPE_COUNT
          the number of tag types
static int TAG_TYPE_EOMIES
          tag type - eomi(ending)
static int TAG_TYPE_JOSA
          tag type - josa(particle)
static int TAG_TYPE_JP
          tag type - predicative particle
static int TAG_TYPE_NBNP
          tag type - bound noun
static int TAG_TYPE_NOUNS
          tag type - noun
static int TAG_TYPE_NPS
          tag type - pronoun
static int TAG_TYPE_VERBS
          tag type - verb
static int TAG_TYPE_YONGS
          tag type - yongeon(verb, adjective)
private  java.util.ArrayList<java.lang.String> tagList
          the morpheme tag list
private  java.util.HashMap<java.lang.String,int[]> tagSetMap
          the hash map for the group of tags
private  int[][] tagTypeTable
          the table for tag types
 java.lang.String title
          the name of tag set
 int unkTag
          the unknown tag
 int[] unkTags
          the list of unknown tags
 java.lang.String version
          the version of tag set
 
Constructor Summary
TagSet()
          Constructor.
 
Method Summary
 boolean checkPhonemeType(int phonemeType, int phoneme)
          Checks the phoneme type
 boolean checkTagType(int tagType, int tag)
          Checks morpheme tag type
 void clear()
          Cleans the data loaded.
 int getIrregularID(java.lang.String irregular)
          Returns the ID of the specified irregular rule.
 java.lang.String getIrregularName(int irregularID)
          Returns the name of the irregular rule for the specified ID.
 int getTagCount()
          Returns the number of morpheme tags loaded.
 int getTagID(java.lang.String tag)
          Returns the ID of the morpheme tag.
 java.lang.String getTagName(int tagID)
          Returns the tag name for the specified ID.
 int[] getTags(java.lang.String tagSetName)
          Returns the morpheme tags in the specified tag group.
 void init(java.lang.String filePath, int tagSetFlag)
          Reads the tag set file, and initializes the object.
 void setTagTypes(int tagSetFlag)
          Sets the tag types.
 
Methods inherited from class java.lang.Object
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
 

Field Detail

TAG_SET_KAIST

public static final int TAG_SET_KAIST
KAIST tag set

See Also:
Constant Field Values

TAG_TYPE_ALL

public static final int TAG_TYPE_ALL
tag type - all

See Also:
Constant Field Values

TAG_TYPE_VERBS

public static final int TAG_TYPE_VERBS
tag type - verb

See Also:
Constant Field Values

TAG_TYPE_NOUNS

public static final int TAG_TYPE_NOUNS
tag type - noun

See Also:
Constant Field Values

TAG_TYPE_NPS

public static final int TAG_TYPE_NPS
tag type - pronoun

See Also:
Constant Field Values

TAG_TYPE_ADJS

public static final int TAG_TYPE_ADJS
tag type - adjective

See Also:
Constant Field Values

TAG_TYPE_NBNP

public static final int TAG_TYPE_NBNP
tag type - bound noun

See Also:
Constant Field Values

TAG_TYPE_JOSA

public static final int TAG_TYPE_JOSA
tag type - josa(particle)

See Also:
Constant Field Values

TAG_TYPE_YONGS

public static final int TAG_TYPE_YONGS
tag type - yongeon(verb, adjective)

See Also:
Constant Field Values

TAG_TYPE_EOMIES

public static final int TAG_TYPE_EOMIES
tag type - eomi(ending)

See Also:
Constant Field Values

TAG_TYPE_JP

public static final int TAG_TYPE_JP
tag type - predicative particle

See Also:
Constant Field Values

TAG_TYPE_COUNT

public static final int TAG_TYPE_COUNT
the number of tag types

See Also:
Constant Field Values

PHONEME_TYPE_ALL

public static final int PHONEME_TYPE_ALL
phoneme type - all

See Also:
Constant Field Values

title

public java.lang.String title
the name of tag set


version

public java.lang.String version
the version of tag set


copyright

public java.lang.String copyright
the copyright of tag set


author

public java.lang.String author
the author of tag set


date

public java.lang.String date
the last update date of tag set


editor

public java.lang.String editor
the editor of the tag set


tagList

private java.util.ArrayList<java.lang.String> tagList
the morpheme tag list


irregularList

private java.util.ArrayList<java.lang.String> irregularList
the irregular rule list


tagSetMap

private java.util.HashMap<java.lang.String,int[]> tagSetMap
the hash map for the group of tags


tagTypeTable

private int[][] tagTypeTable
the table for tag types


indexTags

public int[] indexTags
the list of index tags


unkTags

public int[] unkTags
the list of unknown tags


iwgTag

public int iwgTag
the start tag


unkTag

public int unkTag
the unknown tag


numTag

public int numTag
the number tag


IRR_TYPE_B

public int IRR_TYPE_B
'ㅂ' irregular


IRR_TYPE_S

public int IRR_TYPE_S
'ㅅ' irregular


IRR_TYPE_D

public int IRR_TYPE_D
'ㄷ' irregular


IRR_TYPE_H

public int IRR_TYPE_H
'ㅎ' irregular


IRR_TYPE_REU

public int IRR_TYPE_REU
'르' irregular


IRR_TYPE_REO

public int IRR_TYPE_REO
'러' irregular

Constructor Detail

TagSet

public TagSet()
Constructor.

Method Detail

checkPhonemeType

public boolean checkPhonemeType(int phonemeType,
                                int phoneme)
Checks the phoneme type

Parameters:
phonemeType - - phoneme type
phoneme - - phoneme
Returns:
true: the phoneme belongs to the specified type, otherwise false

checkTagType

public boolean checkTagType(int tagType,
                            int tag)
Checks morpheme tag type

Parameters:
tagType - - tag type
tag - - morpheme tag
Returns:
true: the morpheme tag belongs to the specified type

clear

public void clear()
Cleans the data loaded.


getIrregularID

public int getIrregularID(java.lang.String irregular)
Returns the ID of the specified irregular rule.

Parameters:
irregular - - irregular rule
Returns:
the ID of the irregular rule

getIrregularName

public java.lang.String getIrregularName(int irregularID)
Returns the name of the irregular rule for the specified ID.

Parameters:
irregularID - - the ID of the irregular rule
Returns:
the name of the irregular rule

getTagCount

public int getTagCount()
Returns the number of morpheme tags loaded.

Returns:
the number of morpheme tags loaded

getTagID

public int getTagID(java.lang.String tag)
Returns the ID of the morpheme tag.

Parameters:
tag - - morpheme tag
Returns:
the tag ID, -1 if it doesn't contain the tag

getTagName

public java.lang.String getTagName(int tagID)
Returns the tag name for the specified ID.

Parameters:
tagID - - the morpheme tag ID
Returns:
the tag name

getTags

public int[] getTags(java.lang.String tagSetName)
Returns the morpheme tags in the specified tag group.

Parameters:
tagSetName - - the name of the tag group
Returns:
the list of tag IDs

init

public void init(java.lang.String filePath,
                 int tagSetFlag)
          throws java.io.IOException
Reads the tag set file, and initializes the object.

Parameters:
filePath - - the file for morpheme tag set
Throws:
java.io.IOException

setTagTypes

public void setTagTypes(int tagSetFlag)
Sets the tag types.

Parameters:
tagSetFlag - - the flag for tag set (TAG_SET_KAIST, ..)