Register now and start sharing your code snippets.
-->

How to parse CSV data with Ruby

Ruby posted 2 months ago by christian

Parsing with plain Ruby

   1  filename = 'data.csv'
   2  file = File.new(filename, 'r')
   3  
   4  file.each_line('\n') do |row|
   5    columns = row.split(",")
   6    
   7    break if file.lineno > 10
   8  end

This option doesn’t support quoted text…

Parsing with the CSV library

   1  require 'csv'
   2  
   3  CSV.open('data.csv', 'r', ';') do |row|
   4    p row
   5  end
   6  

Parsing with the FasterCSV library

   1  require 'rubygems'
   2  require 'faster_csv'
   3  
   4  FasterCSV.foreach("data.csv", :quote_char => '"', :col_sep =>';', :row_sep =>:auto) do |row|
   5    puts row[0]
   6    break
   7  end

Tagged csv, parse, ruby, fastercsv

How to parse an RSS or Atom feed with the ROME Java library

Java posted 4 months ago by christian

This is a simple example of how to use the ROME library to parse feeds:

   1  import com.sun.syndication.io.*;
   2  import com.sun.syndication.feed.synd.*;
   3  import java.net.URL;
   4  import java.util.*;
   5  
   6  public class RomeParserTest {
   7  
   8  	public static void main(String args[]) {
   9  		try {
  10  			SyndFeedInput sfi = new SyndFeedInput();
  11  
  12  			String urls[] = {
  13  				"...", 
  14  				"..." 
  15  			};
  16  			
  17  			for(String url:urls) {
  18  				SyndFeed feed = sfi.build(new XmlReader(new URL(url)));
  19  
  20  				List entries = feed.getEntries();
  21  
  22  				System.out.println(feed.getTitle());			
  23  				System.out.println(entries.size());
  24  			}
  25  		} catch (Exception ex) {
  26  			throw new RuntimeException(ex);
  27  		}
  28  	}
  29  }

Tagged rome, java, atom, rss, feed, parse

How to use Ruby and SimpleRSS to parse RSS and Atom feeds

Ruby posted 7 months ago by christian

This script is an example of how to use the SimpleRSS gem to parse an RSS feed.

The script can easily be modified to support conditional gets. It also detects the feed’s character encoding and converts the feed to UTF -8.

   1  require 'iconv'
   2  require 'net/http'
   3  require 'net/https'
   4  require 'rubygems'
   5  require 'simple-rss'
   6  
   7  url = URI.parse('http://hbl.fi/rss.xml')
   8  
   9  http = Net::HTTP.new(url.host, url.port)
  10  
  11  http.open_timeout = http.read_timeout = 10  # Set open and read timeout to 10 seconds
  12  http.use_ssl = (url.scheme == "https")
  13  
  14  headers = {
  15    'User-Agent'          => 'Mozilla/5.0 (Macintosh; U; Intel Mac OS X; en-US; rv:1.8.1.12) Gecko/20080201 Firefox/2.0.0.12',
  16    'If-Modified-Since'   => 'store in a database and set on each request',
  17    'If-None-Match'       => 'store in a database and set on each request'
  18  }
  19  
  20  response, body = http.get(url.path, headers)
  21  
  22  encoding = body.scan(
  23  /^<\?xml [^>]*encoding="([^\"]*)"[^>]*\?>/
  24  ).flatten.first
  25  
  26  if encoding.empty?
  27  	if response["Content-Type"] =~ /charset=([\w\d-]+)/
  28  		puts "Feed #{url} is #{encoding} according to Content-Type header"
  29  		encoding = $1.downcase
  30  	else
  31  		puts "Unable to detect content encoding for #{href}, using default."
  32  		encoding = "ISO-8859-1"
  33  	end
  34  else
  35  	puts "Feed #{url} is #{encoding} according to XML"
  36  end
  37  
  38  # Use 'UTF-8//IGNORE', if this throws an exception
  39  ic = Iconv.new('UTF-8', encoding)
  40  body = ic.iconv(body)
  41  
  42  feed = SimpleRSS.parse(body)
  43  
  44  for item in feed.items
  45    puts item.title
  46  end

Tagged rss, atom, parse, ruby, simplerss, encoding, utf-8