/** * Authors: Frederik Leyvraz, David Degenhardt * License: GNU General Public License v3.0 only * Version: 1.0.0 */ package ch.bfh.ti.latexindexer; import java.util.*; public class Word { private final String value; private int frequency; private Word superVariant; private Set subVariants; private Set variations; /** * Constructor. * @param value The 'content' of the word. */ public Word(String value) { this.value = value; this.frequency = 1; subVariants = new HashSet<>(); variations = new HashSet<>(); variations.add(value); variations.add(value.toLowerCase()); } /** * Constructor * @param value The 'content' of the word. * @param frequency The frequency with which the word occurs in the text. */ public Word(String value, int frequency) { this.value = value; this.frequency = frequency; subVariants = new HashSet<>(); variations = new HashSet<>(); variations.add(value); variations.add(value.toLowerCase()); } /** * Defines another word to be the super-variant of this word. In other words; lets you define this word to be a sub-variant of another. * @param parent The word that is this words super-variant. */ public void setSuperVariant(Word parent){ if (superVariant != null) { superVariant.removeSubVariant(this); } checkSubVariantCycles(parent); this.superVariant = parent; parent.setSubVariant(this); } /** * A helper method for setSuperVariant(). * @param child A word that is a sub-variant of this word. */ private void setSubVariant(Word child){ this.subVariants.add(child); } /** * A helper method for removing sub-variants. * @param child The sub-variant that should be removed from this word. */ private void removeSubVariant(Word child){ subVariants.remove(child); } /** * A helper method that makes surr that there are no cycles in the sub-/super-variants. * @param child The word for which we want to make sure that it is not found in the super-variant chain of this word. */ private void checkSubVariantCycles(Word child){ for (Word w : subVariants){ if (w.equals(child)){ w.superVariant.removeSubVariant(w); w.superVariant = null; break; } w.checkSubVariantCycles(child); } } /** * Defines other words to be variations of this word. * @param variation The word that is a variation of this word. */ public void setVariation(String variation){ variations.add(variation); } /** * @return The frequency with which the word appears in the document. */ public int getFrequency() { return this.frequency; } /** * @return The 'content' of this word. */ public String getValue() { return this.value; } /** * @return The variations of this word present in the document. */ public Set getVariations() { return this.variations; } /** * Makes sure that the \index{} macro contains the right information (concerning sub-variants). * @return The string that should be input in the curly braces of the \index{} macro, so that sub-variants are correctly indexed. */ public String getIndexString() { return this.superVariant != null ? this.superVariant.getIndexString() + "!" + this.value : this.value; } /** * Wraps the index string in the \index{} macro. * @return The correct \index{} macro for this word. */ public String getWrappedIndexString(){ String result = "\\index{"; result = result + getIndexString(); result = result + "}"; return result; } @Override public boolean equals(Object obj) { if (obj == null || obj.getClass() != this.getClass()){ return false; } if (((Word) obj).getValue().equals(this.value)){ return true; } return false; } /** * A comparator that compares words by frequency. */ public static class FrequencyComparator implements Comparator { @Override public int compare(Word w1, Word w2) { if (w1.getFrequency() > w2.getFrequency()) { return -1; } else if (w1.getFrequency() == w2.getFrequency()) { return 0; } else { return 1; } } } /** * A comparator that compares words lexicographically. */ public static class AlphabeticalComparator implements Comparator { @Override public int compare(Word w1, Word w2) { return w1.getValue().compareToIgnoreCase(w2.getValue()); } } }