/* * Tsv v0.5 * 2009 (c) Werner Van Belle * Distributed under the GPL2 *------------------------------ * This is a small ibrary to read in tab seperated value files. * It features * - automatic creation of indices to speed up data lookups * - retrieval of keycollections given a set of attributes * - retrieval of datagroups belonging to a key * - subtable views */ using System; using System.Collections.Generic; using System.Collections; using System.Text; using System.IO; class Record: IComparable { Dictionary keyval; public Record(Record copyfrom) { keyval=new Dictionary(); foreach(KeyValuePair kv in copyfrom.keyval) keyval[kv.Key]=kv.Value; } public Record copy() { return new Record(this); } public Record() { keyval= new Dictionary(); } public IComparable this [String key] { set{ keyval[key]=value; } get{ return keyval[key]; } } public void define(String key, IComparable val) { keyval[key]=val; } public void Remove(String attribute) { keyval.Remove(attribute); } public int count() { return keyval.Count; } public Header header() { String[] s=new String[keyval.Count]; int i=0; foreach(KeyValuePair kv in keyval) s[i++]=kv.Key; return new Header(s); } static int internal_compareto(Record A, Record B) { // making sure that everyuthing is available in both of them IComparable ignore; foreach(KeyValuePair kv in A.keyval) { if (!B.keyval.TryGetValue(kv.Key,out ignore)) { Console.Out.WriteLine("Missing attribute in comparison " + kv.Key); return 2; } } foreach(KeyValuePair kv in B.keyval) { if (!A.keyval.TryGetValue(kv.Key,out ignore)) { Console.Out.WriteLine("Missing attriobute in comparison" + kv.Key); return -2; } } foreach(KeyValuePair kv in A.keyval) { IComparable av=kv.Value; IComparable bv=B[kv.Key]; int c=av.CompareTo(bv); if (c==-1) return -1; if (c==1) return 1; } return 0; } public int CompareTo(object obj) { Record o=(Record)obj; return internal_compareto(this,o); } public override string ToString() { string result=""; foreach(KeyValuePair attribute in keyval) result+=attribute.Key+": "+attribute.Value+"\t"; return result; } static public bool operator <(Record A, Record B) { return A.CompareTo(B)==-1; } public override int GetHashCode () { int result=0; foreach(KeyValuePair kv in keyval) result=result^kv.Key.GetHashCode()^kv.Value.GetHashCode(); return result; } static public bool operator >(Record A, Record B) { return A!=B & !(A=(Record A, Record B) { int r=A.CompareTo(B); return r==1 || r==0; } static public bool operator <=(Record A, Record B) { int r=A.CompareTo(B); return r==-1 || r==0; } public override bool Equals(object obj) { return this.CompareTo(obj)==0; } static public bool operator ==(Record A, Record B) { Console.Out.WriteLine("=="); return A.CompareTo(B)==0; } static public bool operator !=(Record A, Record B) { return !(A==B); } }; class Header: Record { public String[] content; public Header(String[] c) { foreach(String str in c) define(str,str); content=c; } public String at(int i) { return content[i]; } public override string ToString() { string result="("; for(int x=0; x0) result+=", "; result+=content[x]; } result+=")"; return result; } } class Index { public Dictionary > idx; Header header; public List keys() { List result=new List(); foreach(KeyValuePair > kv in idx) result.Add(kv.Key); return result; } public void Add(Record record) { int sx=header.content.Length; Record trimmed_down=new Record(); for(int x = 0 ; x < sx; x++) trimmed_down.define(header.content[x],record[header.content[x]]); List elements; if (idx.TryGetValue(trimmed_down,out elements)) elements.Add(record); else { elements=new List(); elements.Add(record); idx[trimmed_down]=elements; } } public Index(Header hr, List records) { header=hr; // Console.Out.Write("Generating index " + header); idx=new Dictionary>(); foreach(Record record in records) Add(record); //Console.Out.WriteLine(" [" + idx.Count+" entries]"); } public List values(Record key) { try { return idx[key]; } catch (KeyNotFoundException) { throw new KeyNotFoundException(key.ToString()); } } } class Table: IEnumerable { /** * The key keeps track of the parent key that lead to this subtable, if it is a subtable * If there is no parent table then the key remains empty */ public Record key; protected List records; Dictionary indices; public int count() { return records.Count; } void read_tsv(String filename) { StreamReader reader=new StreamReader(filename); // first obtain the names of the columns String line=reader.ReadLine(); String[] column_names=line.Split('\t'); // read the remaining records int y=0; while (!reader.EndOfStream) { ++y; line=reader.ReadLine(); String[] row=line.Split('\t'); Record record=new Record(); for(int x = 0 ; x < column_names.Length; x++) { if (x(); indices=new Dictionary(); } /** * Creates a table which will be filled with the content * of the tab separated file 'filename'. The first row of * that file must contain the attributes. */ public Table(String filename) { key = new Record(); records=new List(); indices=new Dictionary(); read_tsv(filename); Console.Out.WriteLine("Read " + records.Count + " records from " + filename); } /** * Creates a table that is a subtable of another table. We also * called these groups. The subtable (the one we are constructing * here) wll know what the parent key was (can be obtained by * refering to key) and will have a list of all records that * belonged to that group. Normally this constructor should not * directly be called. Use the group or groups methods. */ public Table(Record k, List init) { key=k; records=new List(); indices=new Dictionary(); foreach(Record rec in init) records.Add(rec); } /** * When iterating over a table each record will be * returned one by one */ IEnumerator IEnumerable.GetEnumerator() { foreach(Record record in records) yield return record; } /** * Returs a list that contains all subtables based on * group identification given by header. */ public List groups(Header header) { Index index=find_index(header); List
result=new List
(); foreach(KeyValuePair> entry in index.idx) { Record key=entry.Key; List values = entry.Value; result.Add(new Table(key,values)); } return result; } /** * Returns records with an attribute list conforming to header * and taken from the list of records in this table. The returned * Records can be used as key to identify groups. */ public List keys(Header header) { Index index=find_index(header); return index.keys(); } public List keys(String[] s) { return keys(new Header(s)); } /** * See groups method */ public List
groups(String[] s) { return groups(new Header(s)); } public List
groups(String a) { String [] h={a}; return groups(h); } public List
groups(String a, String b) { String [] h={a,b}; return groups(h); } public List
groups(String a, String b, String c) { String [] h={a,b,c}; return groups(h); } public List
groups(String a, String b, String c, String d) { String [] h={a,b,c,d}; return groups(h); } public List
groups(String a, String b, String c, String d, String e) { String [] h={a,b,c,d,e}; return groups(h); } public List
groups(String a, String b, String c, String d, String e, String f) { String [] h={a,b,c,d,e,f}; return groups(h); } public List
groups(String a, String b, String c, String d, String e, String f, String g) { String [] h={a,b,c,d,e,f,g}; return groups(h); } /** * See keys method */ public List keys(String a) { String[] h={a}; return keys(h); } public List keys(String a, String b) { String[] h={a,b}; return keys(h); } public List keys(String a, String b, String c) { String[] h={a,b,c}; return keys(h); } public List keys(String a, String b, String c, String d) { String[] h={a,b,c,d}; return keys(h); } public List keys(String a, String b, String c, String d, String e) { String[] h={a,b,c,d,e}; return keys(h); } public List keys(String a, String b, String c, String d, String e, String f) { String[] h={a,b,c,d,e,f}; return keys(h); } public List keys(String a, String b, String c, String d, String e, String f, String g) { String[] h={a,b,c,d,e,f,g}; return keys(h); } /** * Adds a record to this table and updates the indices. Needless to say: * once a record is added it should not be changed since that will * mess up the indices */ public void Add(Record r) { records.Add(r); foreach(KeyValuePair index in indices) index.Value.Add(r); } /** * Terturns a subtable that is identified by the key r */ public Table group(Record r) { Index index=find_index(r.header()); return new Table(r,index.values(r)); } /** * If this table contains exactly one element, then * that element will be returned. All other situations * will result in a nice crash */ public Record single { set{ Console.WriteLine("Cannot set a single value in a group"); } get{ if (records.Count!=1) { throw new Exception("Table does not have exactly one element"); } return records[0]; } } /** * A table can be indexed with the [] operator. * The result are all values assocaited with the * provided attribute */ public ArrayList this[String attr] { set{Console.Out.WriteLine("We do not set values in tables");} get{ ArrayList result=new ArrayList(); foreach(Record record in records) { result.Add(record[attr]); } return result; } } public Index find_index(Header header) { Index result; if (indices.TryGetValue(header,out result)) return result; indices[header]=result=new Index(header,records); return result; } public override string ToString() { string result=""; if (key.count()>0) result="Key: " + key + "\nValues:\n"; foreach(Record record in records) result+=record+"\n"; return result; } };