001    /*
002     * Created on Apr 9, 2005
003     *
004     */
005    package aima.learning.framework;
006    
007    /**
008     * @author Ravi Mohan
009     * 
010     */
011    import java.io.BufferedReader;
012    import java.io.InputStreamReader;
013    import java.util.Arrays;
014    import java.util.Hashtable;
015    import java.util.Iterator;
016    import java.util.List;
017    
018    import aima.util.Util;
019    
020    public class DataSetFactory {
021    
022            public DataSet fromFile(String filename, DataSetSpecification spec,
023                            String separator) throws Exception {
024                    // assumed file in data directory and ends in .csv
025                    DataSet ds = new DataSet(spec);
026                    BufferedReader reader = new BufferedReader(
027                                    new InputStreamReader(this.getClass().getResourceAsStream(
028                                                    "../data/" + filename + ".csv")));
029                    String line;
030                    while ((line = reader.readLine()) != null) {
031                            ds.add(exampleFromString(line, spec, separator));
032                    }
033    
034                    return ds;
035    
036            }
037    
038            public static Example exampleFromString(String data,
039                            DataSetSpecification dataSetSpec, String separator) {
040                    Hashtable<String, Attribute> attributes = new Hashtable<String, Attribute>();
041                    List<String> attributeValues = Arrays.asList(data.split(separator));
042                    if (dataSetSpec.isValid(attributeValues)) {
043                            List<String> names = dataSetSpec.getAttributeNames();
044                            Iterator<String> nameiter = names.iterator();
045                            Iterator<String> valueiter = attributeValues.iterator();
046                            while (nameiter.hasNext() && valueiter.hasNext()) {
047                                    String name = nameiter.next();
048                                    AttributeSpecification attributeSpec = dataSetSpec
049                                                    .getAttributeSpecFor(name);
050                                    Attribute attribute = attributeSpec.createAttribute(valueiter
051                                                    .next());
052                                    attributes.put(name, attribute);
053                            }
054                            String targetAttributeName = dataSetSpec.getTarget();
055                            AttributeSpecification attributeSpec = dataSetSpec
056                                            .getAttributeSpecFor(targetAttributeName);
057                            return new Example(attributes, attributes.get(targetAttributeName));
058                    } else {
059                            throw new RuntimeException("Unable to construct Example from "
060                                            + data);
061                    }
062            }
063    
064            public static DataSet getRestaurantDataSet() throws Exception {
065                    DataSetSpecification spec = createRestaurantDataSetSpec();
066                    return new DataSetFactory().fromFile("restaurant", spec, "\\s+");
067            }
068    
069            public static DataSetSpecification createRestaurantDataSetSpec() {
070                    DataSetSpecification dss = new DataSetSpecification();
071                    dss.defineStringAttribute("alternate", Util.yesno());
072                    dss.defineStringAttribute("bar", Util.yesno());
073                    dss.defineStringAttribute("fri/sat", Util.yesno());
074                    dss.defineStringAttribute("hungry", Util.yesno());
075                    dss.defineStringAttribute("patrons", new String[] { "None", "Some",
076                                    "Full" });
077                    dss.defineStringAttribute("price", new String[] { "$", "$$", "$$$" });
078                    dss.defineStringAttribute("raining", Util.yesno());
079                    dss.defineStringAttribute("reservation", Util.yesno());
080                    dss.defineStringAttribute("type", new String[] { "French", "Italian",
081                                    "Thai", "Burger" });
082                    dss.defineStringAttribute("wait_estimate", new String[] { "0-10",
083                                    "10-30", "30-60", ">60" });
084                    dss.defineStringAttribute("will_wait", Util.yesno());
085                    // last attribute is the target attribute unless the target is
086                    // explicitly reset with dss.setTarget(name)
087    
088                    return dss;
089            }
090    
091            public static DataSet getIrisDataSet() throws Exception {
092                    DataSetSpecification spec = createIrisDataSetSpec();
093                    return new DataSetFactory().fromFile("iris", spec, ",");
094            }
095    
096            public static DataSetSpecification createIrisDataSetSpec() {
097                    DataSetSpecification dss = new DataSetSpecification();
098                    dss.defineNumericAttribute("sepal_length");
099                    dss.defineNumericAttribute("sepal_width");
100                    dss.defineNumericAttribute("petal_length");
101                    dss.defineNumericAttribute("petal_width");
102                    dss.defineStringAttribute("plant_category", new String[] { "setosa",
103                                    "versicolor", "virginica" });
104                    return dss;
105            }
106    
107    }