sábado, 15 de agosto de 2009

Mono, C# y Archivos ODS



Tratando de acceder a un archivo en formato xls de Excel con C#, tuve algunos problemas accediendo al contenido de la celda de cada columna de la hoja de cálculo. Así pues empece a investigar otra manera de hacerlo rápido y usando por supuesto C#. Con OpenOffice.org Calc encontré una manera de acceder a ello despues de varias horas probando el acceso al contenido de las celdas. El inconveniente es que no había mucho código para leer y documentarse un poco.

Como estoy aprendiendo el perfil de tester, y usamos el TFS para reportar Bugs o incidencias, y de ahí generamos reportes de los mismos en archivos xls. Quería averiguar la manera de acceder al contenido de las celdas de cada columna para comparar si este bug estaba duplicado o no, o bien, si este bug tenía una relación con otro bug que se le pareciera en su contenido de acuerdo a un porcentaje de parecido.

Usando Fedora 11, Mono 2.x y OOo Calc, lo logré. Aquí el código fuente del archivo bugs.cs:


/*
* Copyright (C) Gerardo Gonzalez Cruz gerardogc2378@gmail.com July 2009.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/

using System;
using System.IO;
using Gtk;
using System.Xml;
using System.Collections;
using System.Collections.Generic;
using System.Text.RegularExpressions;

namespace XMLBugs
{
public class Bugs
{
private string currentDescription = string.Empty;
private string nextDescription = string.Empty;
private string currentBugID = string.Empty;
private string nextBugID = string.Empty;
private string currentSteps = string.Empty;
private string nextSteps = string.Empty;
private string error = string.Empty;
private double diff = -1;
private string similarBugs = string.Empty;
private string equalBugs = string.Empty;
private Gtk.TreeStore bugsTreeStore = new Gtk.TreeStore(typeof (string), typeof (string), typeof (string));
private Gtk.TreeIter iterBug;
private Stack<string> stack = new Stack<string>();

public static void Main()
{
new Bugs();
}

public Bugs()
{
readXMLFile();
//saveToFile();
setBinding();
}

private void saveToFile()
{
string fileName = getFileName();
TextWriter twEqualBugs = new StreamWriter("equal" + fileName);
TextWriter twSimilarBugs = new StreamWriter("similar" + fileName);

twEqualBugs.WriteLine(equalBugs);
twSimilarBugs.WriteLine(similarBugs);

twEqualBugs.Close();
twSimilarBugs.Close();
}

private void setBinding()
{
Application.Init();

Gtk.Window window = new Gtk.Window("Bugs");
window.SetSizeRequest(800,600);

Gtk.ScrolledWindow sw = new Gtk.ScrolledWindow();

Gtk.TreeView tree = new Gtk.TreeView();

sw.Add(tree);

window.Add(sw);

Gtk.TreeViewColumn statusColumn = new Gtk.TreeViewColumn();
statusColumn.Title = "Estado";

Gtk.CellRendererText statusCell = new Gtk.CellRendererText();

statusColumn.PackStart(statusCell, true);

Gtk.TreeViewColumn bugIDColumn = new Gtk.TreeViewColumn();
bugIDColumn.Title = "BugID";

Gtk.CellRendererText bugIDCell = new Gtk.CellRendererText();

bugIDColumn.PackStart(bugIDCell, true);

Gtk.TreeViewColumn diffColumn = new Gtk.TreeViewColumn();
diffColumn.Title = "% Similitud";

Gtk.CellRendererText diffCell = new Gtk.CellRendererText();

diffColumn.PackStart(diffCell, true);

tree.AppendColumn(statusColumn);
tree.AppendColumn(bugIDColumn);
tree.AppendColumn(diffColumn);

statusColumn.AddAttribute(statusCell, "text", 0);
bugIDColumn.AddAttribute(bugIDCell, "text", 1);
diffColumn.AddAttribute(diffCell, "text", 2);

statusColumn.SetCellDataFunc(statusCell, new Gtk.TreeCellDataFunc(RenderStatus));

tree.Model = bugsTreeStore;

window.DeleteEvent += new DeleteEventHandler(delete_window);
window.ShowAll();

Application.Run();
}

private void RenderStatus(Gtk.TreeViewColumn column, Gtk.CellRenderer cell, Gtk.TreeModel model, Gtk.TreeIter iter)
{
if(model.GetValue(iter, 0).ToString() == "Duplicado:")
(cell as Gtk.CellRendererText).Foreground = "red";
else
(cell as Gtk.CellRendererText).Foreground = "black";
}

private static void delete_window(System.Object o, DeleteEventArgs args)
{
Application.Quit();
args.RetVal = true;
}

private void readXMLFile()
{
try
{
XmlDocument xDoc = new XmlDocument();
xDoc.Load("content.xml");
XmlNodeList xnlBugs = xDoc.GetElementsByTagName("table:table");
XmlNodeList xnlAllRows = ((XmlElement)xnlBugs[0]).GetElementsByTagName("table:table-row");

for(int currentRow = 1; currentRow < xnlAllRows.Count; currentRow++)
{
XmlNodeList xnlCurrentRow = ((XmlElement)xnlAllRows[currentRow]).GetElementsByTagName("table:table-cell");

for(int currentCol = 0; currentCol < xnlCurrentRow.Count; currentCol++)
{
switch(currentCol)
{
case 0:
{
XmlNodeList xnlColumn = ((XmlElement)xnlCurrentRow[currentCol]).GetElementsByTagName("text:p");

for(int line = 0; line < xnlColumn.Count; line++)
currentBugID = xnlColumn[line].InnerText.ToString();

break;
}
case 1:
{
XmlNodeList xnlColumn = ((XmlElement)xnlCurrentRow[currentCol]).GetElementsByTagName("text:p");

for(int line = 0; line < xnlColumn.Count; line++)
currentDescription += xnlColumn[line].InnerText.ToString();

break;
}
case 2:
{
XmlNodeList xnlColumn = ((XmlElement)xnlCurrentRow[currentCol]).GetElementsByTagName("text:p");

for(int line = 0; line < xnlColumn.Count; line++)
currentSteps += xnlColumn[line].InnerText.ToString();

break;
}
default: break;
}
}

if(currentBugID == string.Empty)
{
currentBugID = string.Empty;
currentDescription = string.Empty;
currentSteps = string.Empty;
continue;
}
else
Console.WriteLine("Processing: {0}", currentBugID);

for(int nextRow = currentRow + 1; nextRow < xnlAllRows.Count; nextRow++)
{
XmlNodeList xnlNextRow = ((XmlElement)xnlAllRows[nextRow]).GetElementsByTagName("table:table-cell");

for(int nextCol = 0; nextCol < xnlNextRow.Count; nextCol++)
{
switch(nextCol)
{
case 0:
{
XmlNodeList xnlColumn = ((XmlElement)xnlNextRow[nextCol]).GetElementsByTagName("text:p");

for(int line = 0; line < xnlColumn.Count; line++)
nextBugID = xnlColumn[line].InnerText.ToString();

break;
}
case 1:
{
XmlNodeList xnlColumn = ((XmlElement)xnlNextRow[nextCol]).GetElementsByTagName("text:p");

for(int line = 0; line < xnlColumn.Count; line++)
nextDescription += xnlColumn[line].InnerText.ToString();

break;
}
case 2:
{
XmlNodeList xnlColumn = ((XmlElement)xnlNextRow[nextCol]).GetElementsByTagName("text:p");

for(int line = 0; line < xnlColumn.Count; line++)
nextSteps += xnlColumn[line].InnerText.ToString();

break;
}
default: break;
}
}

if(nextBugID == string.Empty)
{
nextBugID = string.Empty;
nextDescription = string.Empty;
nextSteps = string.Empty;
continue;
}

if(currentDescription.Equals(nextDescription)
&& currentSteps.Equals(nextSteps))
{
if(!stack.Contains(currentBugID))
{
iterBug = bugsTreeStore.AppendValues(currentBugID, "", "");
stack.Push(currentBugID);
}

equalBugs += currentBugID + " >> " + nextBugID + "\n";
bugsTreeStore.AppendValues(iterBug, "Duplicado:", nextBugID, "100%");
}
else
{
if(isSimilar(currentDescription + " " + currentSteps,
nextDescription + " " + nextSteps))
{
if(!stack.Contains(currentBugID))
{
iterBug = bugsTreeStore.AppendValues(currentBugID, "", "");
stack.Push(currentBugID);
}

similarBugs += currentBugID + " >> " + nextBugID + "\n";
bugsTreeStore.AppendValues(iterBug, "Similar:", nextBugID, diff.ToString() + "%");
}
}

nextBugID = string.Empty;
nextDescription = string.Empty;
nextSteps = string.Empty;
diff = -1;
}

currentBugID = string.Empty;
currentDescription = string.Empty;
currentSteps = string.Empty;
}

xnlAllRows = null;
xnlBugs = null;
xDoc = null;
}
catch(Exception err)
{
this.error = err.Message;
Console.WriteLine(error);
}
}

private bool isSimilar(string A, string B)
{
if(A == string.Empty
|| B == string.Empty)
{
diff = -1;
return false;
}
else
{
string[] words = null;
string finalString = string.Empty;

switch(A.Length > B.Length)
{
case true:
{
words = B.ToLower().Split(' ');
finalString = A.ToLower();

for(int i = 0; i < words.Length; i++)
{
if(words[i] != string.Empty)
finalString = Regex.Replace(finalString, @"\b(" + removeRareCharacter(words[i]) + @")\b", "~");
}

diff = (finalString.Split('~').Length * 100) / A.Split(' ').Length;

words = null;
finalString = string.Empty;
break;
}
case false:
{
words = A.ToLower().Split(' ');
finalString = B.ToLower();

for(int i = 0; i < words.Length; i++)
{
if(words[i] != string.Empty)
finalString = Regex.Replace(finalString, @"\b(" + removeRareCharacter(words[i]) + @")\b", "~");
}

diff = (finalString.Split('~').Length * 100) / B.Split(' ').Length;

words = null;
finalString = string.Empty;
break;
}
}

if(diff >= 85 && diff <= 100)
return true;
else
return false;
}
}

private string removeRareCharacter(string Obj)
{
string _Obj = Obj;
string[] invalidCharaters = {"\"", ".", ";", ",", "{", "}", "[", "]", "\'", "(", ")", "*", ".-", "-"};

for(int i = 0; i < invalidCharaters.Length; i++)
_Obj = _Obj.Replace(invalidCharaters[i], string.Empty);

return _Obj;
}

private string getFileName()
{
return "Bugs_" +
DateTime.Today.ToShortDateString().Replace("/", ".") +
"_" +
System.DateTime.Now.Hour +
"." +
System.DateTime.Now.Minute +
".txt";
}
}
}


Los pasos que he seguido para ello son los siguientes:

1. Obtengo el arhivo XLS.
2. Lo abro con OOo Calc y lo guardo como ODS.
3. Extraigo del mismo fichero ODS un arhivo de nombre content.xml
4. Lo coloco junto al ejecutable que genera Mono y lo ejecuto.
5. Fin.