001 /* 002 * Created on Apr 9, 2005 003 * 004 */ 005 package aima.learning.framework; 006 007 /** 008 * @author Ravi Mohan 009 * 010 */ 011 import java.io.BufferedReader; 012 import java.io.InputStreamReader; 013 import java.util.Arrays; 014 import java.util.Hashtable; 015 import java.util.Iterator; 016 import java.util.List; 017 018 import aima.util.Util; 019 020 public class DataSetFactory { 021 022 public DataSet fromFile(String filename, DataSetSpecification spec, 023 String separator) throws Exception { 024 // assumed file in data directory and ends in .csv 025 DataSet ds = new DataSet(spec); 026 BufferedReader reader = new BufferedReader( 027 new InputStreamReader(this.getClass().getResourceAsStream( 028 "../data/" + filename + ".csv"))); 029 String line; 030 while ((line = reader.readLine()) != null) { 031 ds.add(exampleFromString(line, spec, separator)); 032 } 033 034 return ds; 035 036 } 037 038 public static Example exampleFromString(String data, 039 DataSetSpecification dataSetSpec, String separator) { 040 Hashtable<String, Attribute> attributes = new Hashtable<String, Attribute>(); 041 List<String> attributeValues = Arrays.asList(data.split(separator)); 042 if (dataSetSpec.isValid(attributeValues)) { 043 List<String> names = dataSetSpec.getAttributeNames(); 044 Iterator<String> nameiter = names.iterator(); 045 Iterator<String> valueiter = attributeValues.iterator(); 046 while (nameiter.hasNext() && valueiter.hasNext()) { 047 String name = nameiter.next(); 048 AttributeSpecification attributeSpec = dataSetSpec 049 .getAttributeSpecFor(name); 050 Attribute attribute = attributeSpec.createAttribute(valueiter 051 .next()); 052 attributes.put(name, attribute); 053 } 054 String targetAttributeName = dataSetSpec.getTarget(); 055 AttributeSpecification attributeSpec = dataSetSpec 056 .getAttributeSpecFor(targetAttributeName); 057 return new Example(attributes, attributes.get(targetAttributeName)); 058 } else { 059 throw new RuntimeException("Unable to construct Example from " 060 + data); 061 } 062 } 063 064 public static DataSet getRestaurantDataSet() throws Exception { 065 DataSetSpecification spec = createRestaurantDataSetSpec(); 066 return new DataSetFactory().fromFile("restaurant", spec, "\\s+"); 067 } 068 069 public static DataSetSpecification createRestaurantDataSetSpec() { 070 DataSetSpecification dss = new DataSetSpecification(); 071 dss.defineStringAttribute("alternate", Util.yesno()); 072 dss.defineStringAttribute("bar", Util.yesno()); 073 dss.defineStringAttribute("fri/sat", Util.yesno()); 074 dss.defineStringAttribute("hungry", Util.yesno()); 075 dss.defineStringAttribute("patrons", new String[] { "None", "Some", 076 "Full" }); 077 dss.defineStringAttribute("price", new String[] { "$", "$$", "$$$" }); 078 dss.defineStringAttribute("raining", Util.yesno()); 079 dss.defineStringAttribute("reservation", Util.yesno()); 080 dss.defineStringAttribute("type", new String[] { "French", "Italian", 081 "Thai", "Burger" }); 082 dss.defineStringAttribute("wait_estimate", new String[] { "0-10", 083 "10-30", "30-60", ">60" }); 084 dss.defineStringAttribute("will_wait", Util.yesno()); 085 // last attribute is the target attribute unless the target is 086 // explicitly reset with dss.setTarget(name) 087 088 return dss; 089 } 090 091 public static DataSet getIrisDataSet() throws Exception { 092 DataSetSpecification spec = createIrisDataSetSpec(); 093 return new DataSetFactory().fromFile("iris", spec, ","); 094 } 095 096 public static DataSetSpecification createIrisDataSetSpec() { 097 DataSetSpecification dss = new DataSetSpecification(); 098 dss.defineNumericAttribute("sepal_length"); 099 dss.defineNumericAttribute("sepal_width"); 100 dss.defineNumericAttribute("petal_length"); 101 dss.defineNumericAttribute("petal_width"); 102 dss.defineStringAttribute("plant_category", new String[] { "setosa", 103 "versicolor", "virginica" }); 104 return dss; 105 } 106 107 }