001    package aima.learning.knowledge;
002    
003    import java.util.ArrayList;
004    import java.util.HashMap;
005    import java.util.List;
006    import java.util.Map;
007    import java.util.regex.Pattern;
008    
009    import aima.learning.framework.DataSetSpecification;
010    import aima.logic.fol.domain.FOLDomain;
011    
012    /**
013     * @author Ciaran O'Reilly
014     * 
015     */
016    public class FOLDataSetDomain extends FOLDomain {
017            //
018            private static Pattern allowableCharactersRegEx = Pattern.compile("[^a-zA-Z_$0-9]");
019            //
020            private DataSetSpecification dataSetSpecification;
021            private String trueGoalValue = null;
022            // Default example prefix, see pg679 of AIMA
023            private String examplePrefix = "X"; 
024            private List<String> descriptionPredicateNames = new ArrayList<String>();
025            private List<String> descriptionDataSetNames = new ArrayList<String>();
026            private Map<String, String> dsToFOLNameMap = new HashMap<String, String>();
027            
028            //
029            // PUBLIC METHODS
030            //
031            public FOLDataSetDomain(DataSetSpecification dataSetSpecification, String trueGoalValue) {
032                    this.dataSetSpecification = dataSetSpecification;
033                    this.trueGoalValue = trueGoalValue;
034                    constructFOLDomain();
035            }
036            
037            public String getDataSetTargetName() {
038                    return dataSetSpecification.getTarget();
039            }
040            
041            public String getGoalPredicateName() {
042                    return getFOLName(dataSetSpecification.getTarget());
043            }
044            
045            public String getTrueGoalValue() {
046                    return trueGoalValue;
047            }
048            
049            public List<String> getDescriptionPredicateNames() {
050                    return descriptionPredicateNames;
051            }
052            
053            public List<String> getDescriptionDataSetNames() {
054                    return descriptionDataSetNames;
055            }
056            
057            public boolean isMultivalued(String descriptiveDataSetName) {
058                    List<String> possibleValues = dataSetSpecification.getPossibleAttributeValues(descriptiveDataSetName);
059                    // If more than two possible values
060                    // then is multivalued
061                    if (possibleValues.size() > 2) {
062                            return true;
063                    }
064                    // If one of the possible values for the attribute
065                    // matches the true goal value then consider
066                    // it not being multivalued.
067                    for (String pv : possibleValues) {
068                            if (trueGoalValue.equals(pv)) {
069                                    return false;
070                            }
071                    }
072                    
073                    return true;
074            }
075            
076            public String getExampleConstant(int egNo) {
077                    String egConstant = examplePrefix+egNo;
078                    addConstant(egConstant);
079                    return egConstant;
080            }
081            
082            public String getFOLName(String dsName) {
083                    String folName = dsToFOLNameMap.get(dsName);
084                    if (null == folName) {
085                            folName = dsName;
086                            if (!Character.isJavaIdentifierStart(dsName.charAt(0))) {
087                                    folName = "_"+dsName;
088                            }
089                            folName = allowableCharactersRegEx.matcher(folName).replaceAll("_");
090                            dsToFOLNameMap.put(dsName, folName);
091                    }
092                    
093                    return folName;
094            }
095            
096            //
097            // PRIVATE METHODS
098            //
099            private void constructFOLDomain() {
100                    // Ensure the target predicate is included
101                    addPredicate(getFOLName(dataSetSpecification.getTarget()));
102                    // Create the descriptive predicates
103                    for (String saName : dataSetSpecification.getNamesOfStringAttributes()) {                       
104                            if (dataSetSpecification.getTarget().equals(saName)) {
105                                    // Don't add the target to the descriptive predicates
106                                    continue;
107                            }
108                            String folSAName = getFOLName(saName);
109                            // Add a predicate for the attribute
110                            addPredicate(folSAName);
111                            
112                            descriptionPredicateNames.add(folSAName);
113                            descriptionDataSetNames.add(saName);
114                            
115                            List<String> attributeValues = dataSetSpecification.getPossibleAttributeValues(saName);
116                            // If a multivalued attribute need to setup
117                            // Constants for the different possible values
118                            if (isMultivalued(saName)) {
119                                    for (String av : attributeValues) {
120                                            addConstant(getFOLName(av));
121                                    }
122                            }
123                    }
124            }
125    }