<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0"
	xmlns:content="http://purl.org/rss/1.0/modules/content/"
	xmlns:wfw="http://wellformedweb.org/CommentAPI/"
	xmlns:dc="http://purl.org/dc/elements/1.1/"
	xmlns:atom="http://www.w3.org/2005/Atom"
	xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
	xmlns:slash="http://purl.org/rss/1.0/modules/slash/"
	>

<channel>
	<title>self.d-struct.org &#187; ruby</title>
	<atom:link href="http://self.d-struct.org/tag/ruby/feed" rel="self" type="application/rss+xml" />
	<link>http://self.d-struct.org</link>
	<description></description>
	<lastBuildDate>Mon, 23 Jan 2012 04:49:34 +0000</lastBuildDate>
	<language>en</language>
	<sy:updatePeriod>hourly</sy:updatePeriod>
	<sy:updateFrequency>1</sy:updateFrequency>
	<generator>http://wordpress.org/?v=3.2.1</generator>
		<item>
		<title>Handling Nested CDATA With Builder</title>
		<link>http://self.d-struct.org/361/handling-nested-cdata-with-builder</link>
		<comments>http://self.d-struct.org/361/handling-nested-cdata-with-builder#comments</comments>
		<pubDate>Tue, 21 Sep 2010 21:19:29 +0000</pubDate>
		<dc:creator>Chris Gansen</dc:creator>
				<category><![CDATA[work]]></category>
		<category><![CDATA[ruby]]></category>
		<category><![CDATA[xml]]></category>

		<guid isPermaLink="false">http://self.d-struct.org/?p=361</guid>
		<description><![CDATA[As noted by our associates at Atomic Object, XML doesn&#8217;t allow for nested&#60;![CDATA[&#8230;]]&#62; elements. In the course of rewriting some pieces of code, I developed the following Builder workaround to allow our application to export valid XML by breaking the nested CDATA elements into distinct chunks. When read back in via our Nokogiri-based parser, it [...]]]></description>
			<content:encoded><![CDATA[<p>As <a href="http://spin.atomicobject.com/2010/09/01/working-with-embedded-cdata-in-xml-documents" target="_blank">noted by our associates at Atomic Object</a>, XML doesn&#8217;t allow for nested&lt;![CDATA[&hellip;]]&gt; elements. In the course of rewriting some pieces of code, I developed the following <a href="http://builder.rubyforge.org/" target="_blank">Builder</a> workaround to allow our application to export valid XML by breaking the nested CDATA elements into distinct chunks. When read back in via our <a href="http://nokogiri.org/Nokogiri.html" target="_blank">Nokogiri</a>-based parser, it concatenates the values automagically, and the end result is clean, valid XML.</p>
<p>Fix code:</p>

<div class="wp_syntax"><div class="code"><pre class="ruby" style="font-family:monospace;"><span style="color:#9966CC; font-weight:bold;">module</span> Builder
  <span style="color:#9966CC; font-weight:bold;">class</span> XmlMarkup <span style="color:#006600; font-weight:bold;">&lt;</span> XmlBase
&nbsp;
    <span style="color:#9966CC; font-weight:bold;">def</span> cdata_with_escaping!<span style="color:#006600; font-weight:bold;">&#40;</span>text<span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">if</span> text =~ <span style="color:#006600; font-weight:bold;">/</span><span style="color:#006600; font-weight:bold;">&#40;</span>\<span style="color:#006600; font-weight:bold;">&#93;</span>\<span style="color:#006600; font-weight:bold;">&#93;</span><span style="color:#006600; font-weight:bold;">&gt;</span><span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">/</span>
        text.<span style="color:#CC0066; font-weight:bold;">gsub!</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#006600; font-weight:bold;">/</span><span style="color:#006600; font-weight:bold;">&#40;</span>\<span style="color:#006600; font-weight:bold;">&#93;</span>\<span style="color:#006600; font-weight:bold;">&#93;</span><span style="color:#006600; font-weight:bold;">&gt;</span><span style="color:#006600; font-weight:bold;">&#41;</span><span style="color:#006600; font-weight:bold;">/</span>, <span style="color:#996600;">&quot;]]]]&gt;&lt;![CDATA[&gt;&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
      <span style="color:#9966CC; font-weight:bold;">end</span>
      cdata_without_escaping!<span style="color:#006600; font-weight:bold;">&#40;</span>text<span style="color:#006600; font-weight:bold;">&#41;</span>
    <span style="color:#9966CC; font-weight:bold;">end</span>
    alias_method_chain <span style="color:#996600;">'cdata!'</span>, <span style="color:#996600;">'escaping'</span>
&nbsp;
  <span style="color:#9966CC; font-weight:bold;">end</span>
<span style="color:#9966CC; font-weight:bold;">end</span></pre></div></div>

<p>Sample output:</p>

<div class="wp_syntax"><div class="code"><pre class="ruby" style="font-family:monospace;"><span style="color:#006600; font-weight:bold;">&gt;&gt;</span> xml = <span style="color:#6666ff; font-weight:bold;">Builder::XmlMarkup</span>.<span style="color:#9900CC;">new</span><span style="color:#006600; font-weight:bold;">&#40;</span>str<span style="color:#006600; font-weight:bold;">&#41;</span>
<span style="color:#006600; font-weight:bold;">&gt;&gt;</span> xml.<span style="color:#9900CC;">cdata</span>!<span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;&lt;![CDATA[Foo bar sna]]&gt;&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
<span style="color:#006600; font-weight:bold;">&gt;&gt;</span> xml.<span style="color:#9900CC;">target</span>!
<span style="color:#006600; font-weight:bold;">=&gt;</span> <span style="color:#996600;">&quot;&lt;![CDATA[&lt;![CDATA[Foo bar sna]]]]&gt;&lt;![CDATA[&gt;]]&gt;&quot;</span>  <span style="color:#008000; font-style:italic;"># valid XML!</span>
<span style="color:#006600; font-weight:bold;">&gt;&gt;</span> xml.<span style="color:#9900CC;">cdata_without_escaping</span>!<span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;&lt;![CDATA[Foo bar sna]]&gt;&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
<span style="color:#006600; font-weight:bold;">&gt;&gt;</span> xml.<span style="color:#9900CC;">target</span>!
<span style="color:#006600; font-weight:bold;">=&gt;</span> <span style="color:#996600;">&quot;&lt;![CDATA[&lt;![CDATA[Foo bar sna]]&gt;]]&gt;&quot;</span> <span style="color:#008000; font-style:italic;"># invalid XML!</span></pre></div></div>

<p>Sample parsing with Nokogiri:</p>

<div class="wp_syntax"><div class="code"><pre class="ruby" style="font-family:monospace;"><span style="color:#006600; font-weight:bold;">&gt;&gt;</span> doc = <span style="color:#6666ff; font-weight:bold;">Nokogiri::XML</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;&lt;baz&gt;&lt;![CDATA[&lt;![CDATA[Foo bar sna]]]]&gt;&lt;![CDATA[&gt;]]&gt;&lt;/baz&gt;&quot;</span><span style="color:#006600; font-weight:bold;">&#41;</span>
<span style="color:#006600; font-weight:bold;">=&gt;</span> <span style="color:#008000; font-style:italic;">#&lt;Nokogiri::XML::Document:0x825aff3c name=&quot;document&quot; children=[#&lt;Nokogiri::XML::Element:0x825afc1c name=&quot;baz&quot; children=[#&lt;Nokogiri::XML::CDATA:0x825af99c &quot;&lt;![CDATA[Foo bar sna]]&gt;&quot;&gt;]&gt;]&gt;</span>
<span style="color:#006600; font-weight:bold;">&gt;&gt;</span> doc.<span style="color:#9900CC;">css</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">'baz'</span><span style="color:#006600; font-weight:bold;">&#41;</span>.<span style="color:#9900CC;">first</span>.<span style="color:#9900CC;">content</span>
<span style="color:#006600; font-weight:bold;">=&gt;</span> <span style="color:#996600;">&quot;&lt;![CDATA[Foo bar sna]]&gt;&quot;</span></pre></div></div>

]]></content:encoded>
			<wfw:commentRss>http://self.d-struct.org/361/handling-nested-cdata-with-builder/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>Where to find old versions of Ruby</title>
		<link>http://self.d-struct.org/312/where-to-find-old-versions-of-ruby</link>
		<comments>http://self.d-struct.org/312/where-to-find-old-versions-of-ruby#comments</comments>
		<pubDate>Mon, 03 May 2010 22:41:34 +0000</pubDate>
		<dc:creator>Chris Gansen</dc:creator>
				<category><![CDATA[Uncategorized]]></category>
		<category><![CDATA[ruby]]></category>

		<guid isPermaLink="false">http://self.d-struct.org/?p=312</guid>
		<description><![CDATA[This post is as much for my reference as it is frustrated folks trying to find non-1.9 versions for old Rails apps. Ruby 1.8.6 p399: ftp://ftp.ruby-lang.org/pub/ruby/1.8/ruby-1.8.6-p399.tar.gz (and in zip and bz2) Ruby 1.8.7 p249: ftp://ftp.ruby-lang.org/pub/ruby/1.8/ruby-1.8.7-p249.tar.gz (and in zip and bz2) And of course, browse the entire FTP archive for everything going back to 1.8.0.]]></description>
			<content:encoded><![CDATA[<p>This post is as much for my reference as it is frustrated folks trying to find non-1.9 versions for old Rails apps.</p>
<p><strong>Ruby 1.8.6 p399</strong>: <a href="ftp://ftp.ruby-lang.org/pub/ruby/1.8/ruby-1.8.6-p399.tar.gz">ftp://ftp.ruby-lang.org/pub/ruby/1.8/ruby-1.8.6-p399.tar.gz</a> (and in <a href="ftp://ftp.ruby-lang.org/pub/ruby/1.8/ruby-1.8.6-p399.zip">zip</a> and <a href="ftp://ftp.ruby-lang.org/pub/ruby/1.8/ruby-1.8.6-p399.bz2">bz2</a>)</p>
<p><strong>Ruby 1.8.7 p249</strong>: <a href="ftp://ftp.ruby-lang.org/pub/ruby/1.8/ruby-1.8.7-p249.tar.gz">ftp://ftp.ruby-lang.org/pub/ruby/1.8/ruby-1.8.7-p249.tar.gz</a> (and in <a href="ftp://ftp.ruby-lang.org/pub/ruby/1.8/ruby-1.8.7-p249.zip">zip</a> and <a href="ftp://ftp.ruby-lang.org/pub/ruby/1.8/ruby-1.8.7-p249.bz2">bz2</a>)</p>
<p>And of course, browse the <a href="ftp://ftp.ruby-lang.org/pub/ruby/1.8/">entire FTP archive</a> for everything going back to 1.8.0.</p>
]]></content:encoded>
			<wfw:commentRss>http://self.d-struct.org/312/where-to-find-old-versions-of-ruby/feed</wfw:commentRss>
		<slash:comments>0</slash:comments>
		</item>
		<item>
		<title>HOWTO: Remove Byte-order Mark with Ruby and Iconv</title>
		<link>http://self.d-struct.org/195/howto-remove-byte-order-mark-with-ruby-and-iconv</link>
		<comments>http://self.d-struct.org/195/howto-remove-byte-order-mark-with-ruby-and-iconv#comments</comments>
		<pubDate>Mon, 19 Oct 2009 20:24:32 +0000</pubDate>
		<dc:creator>Chris Gansen</dc:creator>
				<category><![CDATA[work]]></category>
		<category><![CDATA[ruby]]></category>
		<category><![CDATA[unicode]]></category>

		<guid isPermaLink="false">http://self.d-struct.org/?p=195</guid>
		<description><![CDATA[I&#8217;m working on a small project that involves loading a UTF-16LE (16-bit Unicode, Little Endian) CSV file, converting it to UTF-8 (normal Unicode, as it may be) with iconv, then parsing the values with FasterCSV. Everything was working fine except for loading the first column of data by the column header value. For example, given [...]]]></description>
			<content:encoded><![CDATA[<p>I&#8217;m working on a small project that involves loading a UTF-16LE (16-bit Unicode, Little Endian) CSV file, converting it to UTF-8 (normal Unicode, as it may be) with <a href="http://www.ruby-doc.org/stdlib/libdoc/iconv/rdoc/index.html">iconv</a>, then parsing the values with FasterCSV. Everything was working fine except for loading the first column of data by the column header value. For example, given data:</p>
<table style="width: 75%; border:1px">
<tbody>
<tr>
<th>First Name</th>
<th>Last Name</th>
<th>Email</th>
</tr>
<tr>
<td>Jimbo</td>
<td>Jones</td>
<td>jimbo.jones@example.com</td>
</tr>
</tbody>
</table>
<p>I could access column 2 (Last Name) as either <code>row.field("Last Name")</code> or <code>row.field(1)</code>. However, if I tried to access the first column using <code>row.field("First Name")</code>, it would return <code>nil</code>. <code>row.field(0)</code>, on the other hand, would return the proper value.</p>
<p>Hmmmm.</p>
<p>After some sleuthing, I examined the raw content of the string:</p>

<div class="wp_syntax"><div class="code"><pre class="ruby" style="font-family:monospace;"><span style="color:#006600; font-weight:bold;">&#40;</span>rdb:<span style="color:#006666;">1</span><span style="color:#006600; font-weight:bold;">&#41;</span> <span style="color:#CC0066; font-weight:bold;">p</span> row.<span style="color:#9900CC;">headers</span>.<span style="color:#9900CC;">first</span>.<span style="color:#9900CC;">unpack</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">'C*'</span><span style="color:#006600; font-weight:bold;">&#41;</span>
<span style="color:#006600; font-weight:bold;">&#91;</span><span style="color:#006666;">239</span>, <span style="color:#006666;">187</span>, <span style="color:#006666;">191</span>, <span style="color:#006666;">70</span>, <span style="color:#006666;">105</span>, <span style="color:#006666;">114</span>, <span style="color:#006666;">115</span>, <span style="color:#006666;">116</span>, <span style="color:#006666;">32</span>, <span style="color:#006666;">78</span>, <span style="color:#006666;">97</span>, <span style="color:#006666;">109</span>, <span style="color:#006666;">101</span><span style="color:#006600; font-weight:bold;">&#93;</span></pre></div></div>

<p>Ah, ha! The first three characters are the <a href="http://en.wikipedia.org/wiki/Byte-order_mark">byte-order mark, or BOM</a>. Ruby, for whatever reason, does not strip it when reading a file as input, so it&#8217;s passed along in the input stream. When loading a file with <a href="http://fastercsv.rubyforge.org">FasterCSV</a>, it&#8217;ll keep those characters in the key name, causing lookups by the first column key name to return nil.</p>
<p>I modified my file conversion code as follows:</p>

<div class="wp_syntax"><div class="code"><pre class="ruby" style="font-family:monospace;">  <span style="color:#9966CC; font-weight:bold;">def</span> convert_to_utf8
    <span style="color:#008000; font-style:italic;"># Data files are exported as Little Endian UTF-16. We need to parse as UTF-8</span>
    contents = <span style="color:#CC00FF; font-weight:bold;">File</span>.<span style="color:#CC0066; font-weight:bold;">open</span><span style="color:#006600; font-weight:bold;">&#40;</span>@file_name<span style="color:#006600; font-weight:bold;">&#41;</span>.<span style="color:#9900CC;">read</span>      
    <span style="color:#9966CC; font-weight:bold;">begin</span>
      converted = <span style="color:#CC00FF; font-weight:bold;">Iconv</span>.<span style="color:#9900CC;">iconv</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">'UTF-8'</span>, <span style="color:#996600;">'UTF-16LE'</span>, contents<span style="color:#006600; font-weight:bold;">&#41;</span>
      converted.<span style="color:#9900CC;">first</span>.<span style="color:#CC0066; font-weight:bold;">gsub!</span><span style="color:#006600; font-weight:bold;">&#40;</span><span style="color:#996600;">&quot;<span style="color:#000099;">\x</span>EF<span style="color:#000099;">\x</span>BB<span style="color:#000099;">\x</span>BF&quot;</span>, <span style="color:#996600;">''</span><span style="color:#006600; font-weight:bold;">&#41;</span> <span style="color:#008000; font-style:italic;"># strip the BOM (byte order mark) from the first line of input</span>
      output = <span style="color:#CC00FF; font-weight:bold;">File</span>.<span style="color:#CC0066; font-weight:bold;">open</span><span style="color:#006600; font-weight:bold;">&#40;</span>@file_name, <span style="color:#996600;">'w'</span><span style="color:#006600; font-weight:bold;">&#41;</span>
      output.<span style="color:#9900CC;">write</span><span style="color:#006600; font-weight:bold;">&#40;</span>converted<span style="color:#006600; font-weight:bold;">&#41;</span>
    <span style="color:#9966CC; font-weight:bold;">rescue</span> <span style="color:#6666ff; font-weight:bold;">Iconv::Failure</span>
      <span style="color:#CC0066; font-weight:bold;">puts</span> $!.<span style="color:#9900CC;">inspect</span>
    <span style="color:#9966CC; font-weight:bold;">end</span>
  <span style="color:#9966CC; font-weight:bold;">end</span></pre></div></div>

<p>And all is well in the world.</p>
]]></content:encoded>
			<wfw:commentRss>http://self.d-struct.org/195/howto-remove-byte-order-mark-with-ruby-and-iconv/feed</wfw:commentRss>
		<slash:comments>2</slash:comments>
		</item>
	</channel>
</rss>

