#!/usr/bin/env ruby require 'optparse' require 'open-uri' require 'uri' require 'rubygems' require 'nokogiri' autoload :IRB, 'irb' parse_class = Nokogiri encoding = nil # This module provides some tunables with the nokogiri CLI for use in # your ~/.nokogirirc. module Nokogiri::CLI class << self # Specify the console engine, defaulted to IRB. # # call-seq: # require 'pry' # Nokogiri::CLI.console = Pry attr_writer :console def console case @console when Symbol Kernel.const_get(@console) else @console end end attr_accessor :rcfile end self.rcfile = File.expand_path('~/.nokogirirc') self.console = :IRB end opts = OptionParser.new do |opts| opts.banner = "Nokogiri: an HTML, XML, SAX, and Reader parser" opts.define_head "Usage: nokogiri [options]" opts.separator "" opts.separator "Examples:" opts.separator " nokogiri http://www.ruby-lang.org/" opts.separator " nokogiri ./public/index.html" opts.separator " curl -s http://nokogiri.org | nokogiri -e'p $_.css(\"h1\").length'" opts.separator "" opts.separator "Options:" opts.on("--type type", "Parse as type: xml or html (default: auto)", [:xml, :html]) do |v| parse_class = {:xml => Nokogiri::XML, :html => Nokogiri::HTML}[v] end opts.on("-C file", "Specifies initialization file to load (default #{Nokogiri::CLI.rcfile})") do |v| Nokogiri::CLI.rcfile = v end opts.on("-E", "--encoding encoding", "Read as encoding (default: #{encoding || 'none'})") do |v| encoding = v end opts.on("-e command", "Specifies script from command-line.") do |v| @script = v end opts.on("--rng ", "Validate using this rng file.") do |v| @rng = open(v) {|f| Nokogiri::XML::RelaxNG(f)} end opts.on_tail("-?", "--help", "Show this message") do puts opts exit end opts.on_tail("-v", "--version", "Show version") do puts Nokogiri::VersionInfo.instance.to_markdown exit end end opts.parse! url = ARGV.shift if url.to_s.strip.empty? && $stdin.tty? puts opts exit 1 end if File.file?(Nokogiri::CLI.rcfile) load Nokogiri::CLI.rcfile end if url || $stdin.tty? case uri = (URI(url) rescue url) when URI::HTTP @doc = parse_class.parse(uri.read, url, encoding) else @doc = parse_class.parse(open(url).read, nil, encoding) end else @doc = parse_class.parse($stdin, nil, encoding) end $_ = @doc if @rng @rng.validate(@doc).each do |error| puts error.message end else if @script eval @script, binding, '
' else puts "Your document is stored in @doc..." Nokogiri::CLI.console.start end end