User:Dan Polansky/CFI.java

From Wiktionary, the free dictionary
Jump to navigation Jump to search
class CFI {
  /** Determines whether a term should be included in English
      Wiktionary in a given sense. */
  boolean shouldBeIncluded(String term, String sense) {    
    return // Is attested, and
           isAttested(term, sense) &&
           // If it is from fictional universe, then it meets its criteria, and
           (!fromFictionalUniverse(term, sense) || meetsFictionalUniverseCriteria(term, sense)) &&
           // If it is a proper name, then it meets specific citeria for proper names, and
           (!isProperName(term,sense) || shouldProperNameBeIncludedGivenAttestation(term, sense)) &&
           // If it is not a proper name, then it is not a semantic sum of parts
           (isProperName(term,sense) || !isSemanticSumOfParts(term, sense));
  }

  /** Determines whether the term is attested in a given sense.
      Four criteria of attestation are considered. */
  boolean isAttested(String term, String sense) {
    return isInWidespreadUse(term, sense) ||
           isInOneWellknownWork(term, sense) ||
           hasThreeSuitableQuotations(term, sense) ||
           ( getLanguage(term).isExtict() && hasOneContemporaneousQuotation(term, sense));
    // The fourth condition is broken; how would one know the language merely from the term?
    // The language would have to be passed as an argument.
  }

  /** Determines whether the term has three suitable quotations
      in a given sense, spanning at least a year. */
  boolean hasThreeSuitableQuotations(term, sense) {
     // The current implementation is extremely resource-wasting. To be reimplemented
     // in such a way that humans can conveniently follow the method.

     List allQuotations = Corpus.getAllQuotations(term); // A call of a super-mighty method    
     List candidateQuotations = new ArrayList();

     // Filter all quotations by the requirements of being durably archived and used rather than mentioned
     for (i=allQuotations.iterator(); i.hasNext();) {
       String quotation = (String)i.next();
       if (isFromDurablyArchivedSource(quotation) && // Broken; a string alone does not tell its source.
           isUseRatherThanMention(quotation, term, sense))
         candidateQuotations.add(quotation)
     }

     // Remove dependent quotations
     for (i=candidateQuotations.iterator(); i.hasNext();) {
       String quotation = (String)i.next();
       for (j=candidateQuotations.iterator(); j.hasNext();) {
         String quotation2 = (String)j.next();
         if (quotation2.equals(quotation1))
           break; // Loop only up to the same quotation, checking only the previous ones.
         if (isDependentQuotation(quotation, quotation2)) {
           candidateQuotations.remove(quotation);
           break; }}}

     // Determine time span
     if (getTimeSpanInYears(candidateQuotations) < 1.0)
       return false;

     return candidateQuotations.size() >= 3; // At least three suitable quotations
  }

  /** Determines whether the term in the given sense is a semantic
      sum of parts. Examples of sums of parts include "brown leaf". */
  boolean isSemanticSumOfParts(String term, String sense) {
    // To be implemented
    if (hasNoSpaceAndNoHyphen(term)) {
      return true; //This may be controversial for German.
    } else {
      // It is a multi-word term.
      return true; // Dummy return value
    }
  }

  /** Determines whether a proper name should be included in a given sense given
      it is attested. */  
  boolean shouldProperNameBeIncludedGivenAttestation(String term, String sense) {
    // Specific cases
    if (isGivenName(term, sense) || isSurname(term, sense) || isPatronymic(term, sense))
       return true;
    if (isBrandOfPhysicalProduct(term, sense))
       return shouldBrandOfPhysicalProductBeIncluded(term, sense);
    // The general case
    return true; // Dummy return value; the general case is unimplemented for lacking consensus.
  }

  /** Determines whether a brand of physical product should be included using
      seven requirements on attesting quotations. */
  boolean shouldBrandOfPhysicalProductBeIncluded(term, sense) {
     return false; // Dummy return value; complex criteria that largely lead to exclusion.
  }
}
  /*
    Issues:
    * Missing constructed languages
    * Missing company names (I don't care)
    * Language and part of speech probably need to be added to all arguments of methods; 
      for simplicity's sake, I have omitted them.
   */