diff --git a/README.md b/README.md index 7a34d29..311ed86 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,8 @@ # Ruby CSS Parser [![Build Status](https://github.com/premailer/css_parser/workflows/Run%20css_parser%20CI/badge.svg)](https://github.com/ojab/css_parser/actions?query=workflow%3A%22Run+css_parser+CI%22) [![Gem Version](https://badge.fury.io/rb/css_parser.svg)](https://badge.fury.io/rb/css_parser) -Load, parse and cascade CSS rule sets in Ruby. +Load, parse and cascade CSS rule sets in Ruby. + +If you are looking for a pure css stylesheet parser/tokenizer/lexer have a look at [crass](https://rubygems.org/gems/crass) or [syntax_tree-css](https://rubygems.org/gems/syntax_tree-css) # Setup @@ -10,35 +12,41 @@ gem install css_parser # Usage +You initiate a document `CssParser::Document.new` and you can start to load it with css. Main methods to add css are: load_uri! (load url and follows @imports based on the full url), load_file! (loads file and follows @imports based on path from file imported) and load_string! (load a block of css). All of these apis tries to absolute all urls. + +CssParser::Document -> Wrapper to holds all the rules on one block +CssParser::RuleSet -> Wrapper to hold each use like `.a, .b { color: hotpink; }`. notice this example has two selectors `.a` and `.b` + + ```Ruby require 'css_parser' include CssParser -parser = CssParser::Parser.new -parser.load_uri!('http://example.com/styles/style.css') +document = CssParser::Document.new +document.load_uri!('http://example.com/styles/style.css') -parser = CssParser::Parser.new -parser.load_uri!('file://home/user/styles/style.css') +document = CssParser::Document.new +document.load_uri!('file://home/user/styles/style.css') # load a remote file, setting the base_uri and media_types -parser.load_uri!('../style.css', {base_uri: 'http://example.com/styles/inc/', media_types: [:screen, :handheld]}) +document.load_uri!('../style.css', {base_uri: 'http://example.com/styles/inc/', media_types: [:screen, :handheld]}) # load a local file, setting the base_dir and media_types -parser.load_file!('print.css', '~/styles/', :print) +document.load_file!('print.css', '~/styles/', :print) # load a string -parser = CssParser::Parser.new -parser.load_string! 'a { color: hotpink; }' +document = CssParser::Document.new +document.load_string! 'a { color: hotpink; }' # lookup a rule by a selector -parser.find_by_selector('#content') +document.find_by_selector('#content') #=> 'font-size: 13px; line-height: 1.2;' # lookup a rule by a selector and media type -parser.find_by_selector('#content', [:screen, :handheld]) +document.find_by_selector('#content', [:screen, :handheld]) # iterate through selectors by media type -parser.each_selector(:screen) do |selector, declarations, specificity| +document.each_selector(:screen) do |selector, declarations, specificity| ... end @@ -47,24 +55,24 @@ css = <<-EOT body { margin: 0 1em; } EOT -parser.add_block!(css) +document.add_block!(css) # output all CSS rules in a single stylesheet -parser.to_s +document.to_s => #content { font-size: 13px; line-height: 1.2; } body { margin: 0 1em; } # capturing byte offsets within a file -parser.load_uri!('../style.css', {base_uri: 'http://example.com/styles/inc/', capture_offsets: true) -content_rule = parser.find_rule_sets(['#content']).first +document.load_uri!('../style.css', {base_uri: 'http://example.com/styles/inc/', capture_offsets: true) +content_rule = document.find_rule_sets(['#content']).first content_rule.filename #=> 'http://example.com/styles/styles.css' content_rule.offset #=> 10703..10752 # capturing byte offsets within a string -parser.load_string!('a { color: hotpink; }', {filename: 'index.html', capture_offsets: true) -content_rule = parser.find_rule_sets(['a']).first +document.load_string!('a { color: hotpink; }', {filename: 'index.html', capture_offsets: true) +content_rule = document.find_rule_sets(['a']).first content_rule.filename #=> 'index.html' content_rule.offset diff --git a/Rakefile b/Rakefile index 4ae5610..637f435 100644 --- a/Rakefile +++ b/Rakefile @@ -31,15 +31,15 @@ task :benchmark do complex_css_path = fixtures_dir.join('complex.css').to_s.freeze Benchmark.ips do |x| - x.report('import1.css loading') { CssParser::Parser.new.load_file!(import_css_path) } - x.report('complex.css loading') { CssParser::Parser.new.load_file!(complex_css_path) } + x.report('import1.css loading') { CssParser::Document.new.load_file!(import_css_path) } + x.report('complex.css loading') { CssParser::Document.new.load_file!(complex_css_path) } end puts - report = MemoryProfiler.report { CssParser::Parser.new.load_file!(import_css_path) } + report = MemoryProfiler.report { CssParser::Document.new.load_file!(import_css_path) } puts "Loading `import1.css` allocated #{report.total_allocated} objects, #{report.total_allocated_memsize / 1024} KiB" - report = MemoryProfiler.report { CssParser::Parser.new.load_file!(complex_css_path) } + report = MemoryProfiler.report { CssParser::Document.new.load_file!(complex_css_path) } puts "Loading `complex.css` allocated #{report.total_allocated} objects, #{report.total_allocated_memsize / 1024} KiB" end diff --git a/lib/css_parser.rb b/lib/css_parser.rb index f89bf06..411f107 100644 --- a/lib/css_parser.rb +++ b/lib/css_parser.rb @@ -10,10 +10,13 @@ require 'crass' require 'css_parser/version' +require 'css_parser/http_read_url' +require 'css_parser/file_resource' require 'css_parser/rule_set' require 'css_parser/rule_set/declarations' require 'css_parser/regexps' -require 'css_parser/parser' +require 'css_parser/parser_fx' +require 'css_parser/document' module CssParser class Error < StandardError; end @@ -58,8 +61,6 @@ class EmptyValueError < Error; end # TODO: declaration_hashes should be able to contain a RuleSet # this should be a Class method def self.merge(*rule_sets) - @folded_declaration_cache = {} - # in case called like CssParser.merge([rule_set, rule_set]) rule_sets.flatten! if rule_sets[0].is_a?(Array) @@ -154,11 +155,4 @@ def self.convert_uris(css, base_uri) "url('#{uri}')" end end - - def self.sanitize_media_query(raw) - mq = raw.to_s.gsub(/\s+/, ' ') - mq.strip! - mq = 'all' if mq.empty? - mq.to_sym - end end diff --git a/lib/css_parser/parser.rb b/lib/css_parser/document.rb similarity index 56% rename from lib/css_parser/parser.rb rename to lib/css_parser/document.rb index b2e0e59..3062763 100644 --- a/lib/css_parser/parser.rb +++ b/lib/css_parser/document.rb @@ -1,73 +1,23 @@ # frozen_string_literal: true module CssParser - # Exception class used for any errors encountered while downloading remote files. - class RemoteFileError < IOError; end - - # Exception class used if a request is made to load a CSS file more than once. - class CircularReferenceError < StandardError; end - - # We have a Parser class which you create and instance of but we have some - # functions which is nice to have outside of this instance - # - # Intended as private helpers for lib. Breaking changed with no warning - module ParserFx - # Receives properties from a style_rule node from crass. - def self.create_declaration_from_properties(properties) - declarations = RuleSet::Declarations.new - - properties.each do |child| - case child - in node: :property, value: '' # nothing, happen for { color:green; color: } - in node: :property - declarations.add_declaration!( - child[:name], - RuleSet::Declarations::Value.new(child[:value], important: child[:important]) - ) - in node: :whitespace # nothing - in node: :semicolon # nothing - in node: :error # nothing - end - end - - declarations - end - - # it is expecting the selector tokens from node: :style_rule, not just - # from Crass::Tokenizer.tokenize(input) - def self.split_selectors(tokens) - tokens - .each_with_object([[]]) do |token, sum| - case token - in node: :comma - sum << [] - else - sum.last << token - end - end - end - end - - # == Parser class + # == Document class # # All CSS is converted to UTF-8. # - # When calling Parser#new there are some configuaration options: + # When calling Document#new there are some configuaration options: # [absolute_paths] Convert relative paths to absolute paths (href, src and url(''). Boolean, default is false. # [import] Follow @import rules. Boolean, default is true. # [io_exceptions] Throw an exception if a link can not be found. Boolean, default is true. - class Parser - USER_AGENT = "Ruby CSS Parser/#{CssParser::VERSION} (https://github.com/premailer/css_parser)".freeze - MAX_REDIRECTS = 3 - - # Array of CSS files that have been loaded. - attr_reader :loaded_uris + class Document + module Util + def self.ensure_media_types(media_types) + Array(media_types) + .tap { raise ArgumentError unless _1.all? { |type| type.is_a?(String) || type == :all } } + end + end - #-- - # Class variable? see http://www.oreillynet.com/ruby/blog/2007/01/nubygems_dont_use_class_variab_1.html - #++ - @folded_declaration_cache = {} - class << self; attr_reader :folded_declaration_cache; end + USER_AGENT = "Ruby CSS Parser/#{CssParser::VERSION} (https://github.com/premailer/css_parser)".freeze def initialize(options = {}) @options = { @@ -79,21 +29,50 @@ def initialize(options = {}) user_agent: USER_AGENT }.merge(options) + @options[:http_resource] ||= CssParser::HTTPReadURL + .new(agent: @options[:user_agent], + io_exceptions: @options[:io_exceptions]) + @options[:file_resource] ||= CssParser::FileResource + .new(io_exceptions: @options[:io_exceptions]) + # array of RuleSets @rules = [] + end - @redirect_count = nil + # Iterate through RuleSet objects. + # + # +media_types+ can be a symbol or an array of media queries (:all or string). + def each_rule_set(media_types = :all) # :yields: rule_set, media_types + return to_enum(__method__, media_types) unless block_given? - @loaded_uris = [] + media_types = Util.ensure_media_types(media_types) + @rules.each do |block| + if media_types.include?(:all) or block[:media_types].any? { |mt| media_types.include?(mt) } + yield(block[:rules], block[:media_types]) + end + end + end + + # Iterate through CSS selectors. + # + # The difference between each_rule_set and this method is that this method + # exposes each selector to to the rule. + # + # +media_types+ can be a symbol or an array of media queries (:all or string). + # See RuleSet#each_selector for +options+. + def each_selector(all_media_types = :all, options = {}) # :yields: selectors, declarations, specificity, media_types + return to_enum(__method__, all_media_types, options) unless block_given? - # unprocessed blocks of CSS - @blocks = [] - reset! + each_rule_set(all_media_types) do |rule_set, media_types| + rule_set.each_selector(options) do |selectors, declarations, specificity| + yield selectors, declarations, specificity, media_types + end + end end # Get declarations by selector. # - # +media_types+ are optional, and can be a symbol or an array of symbols. + # +media_types+ are optional, and can be a symbol or an array of media queries (:all or string). # The default value is :all. # # ==== Examples @@ -132,14 +111,81 @@ def find_rule_sets(selectors, media_types = :all) rule_sets end + # A hash of { :media_query => rule_sets } + def rules_by_media_query + rules_by_media = {} + @rules.each do |block| + block[:media_types].each do |mt| + unless rules_by_media.key?(mt) + rules_by_media[mt] = [] + end + rules_by_media[mt] << block[:rules] + end + end + + rules_by_media + end + + # Load a remote CSS file. + # + # You can also pass in file://test.css + # + # See add_block! for options. + def load_uri!(uri, options = {}) + uri = Addressable::URI.parse(uri) unless uri.respond_to? :scheme + + opts = {base_uri: nil, media_types: :all} + opts.merge!(options) + + if uri.scheme == 'file' or uri.scheme.nil? + uri.path = File.expand_path(uri.path) + uri.scheme = 'file' + end + + opts[:base_uri] = uri if opts[:base_uri].nil? + + # pass on the uri if we are capturing file offsets + opts[:filename] = uri.to_s if opts[:capture_offsets] + + src, = @options[:http_resource].read_remote_file(uri) # skip charset + + add_block!(src, opts) if src + end + + # Load a local CSS file. + def load_file!(file_name, options = {}) + opts = {base_dir: nil, media_types: :all} + opts.merge!(options) + + file_path = @options[:file_resource] + .find_file(file_name, base_dir: opts[:base_dir]) + # we we cant read the file it's nil + return if file_path.nil? + + src = File.read(file_path) + + opts[:filename] = file_path if opts[:capture_offsets] + opts[:base_dir] = File.dirname(file_path) + + add_block!(src, opts) + end + + # Load a local CSS string. + def load_string!(src, options = {}) + opts = {base_dir: nil, media_types: :all} + opts.merge!(options) + + add_block!(src, opts) + end + # Add a raw block of CSS. # # In order to follow +@import+ rules you must supply either a # +:base_dir+ or +:base_uri+ option. # - # Use the +:media_types+ option to set the media type(s) for this block. Takes an array of symbols. + # Use the +:media_types+ option to set the media type(s) for this block. Takes an media queries (:all or string). # - # Use the +:only_media_types+ option to selectively follow +@import+ rules. Takes an array of symbols. + # Use the +:only_media_types+ option to selectively follow +@import+ rules. Takes an media queries (:all or string). # # ==== Example # css = <<-EOT @@ -150,22 +196,19 @@ def find_rule_sets(selectors, media_types = :all) # } # EOT # - # parser = CssParser::Parser.new + # parser = CssParser::Document.new # parser.add_block!(css) def add_block!(block, options = {}) - options = {base_uri: nil, base_dir: nil, charset: nil, media_types: :all, only_media_types: :all}.merge(options) - options[:media_types] = [options[:media_types]].flatten.collect { |mt| CssParser.sanitize_media_query(mt) } - options[:only_media_types] = [options[:only_media_types]].flatten.collect { |mt| CssParser.sanitize_media_query(mt) } + options = {base_uri: nil, base_dir: nil, charset: nil, media_types: [:all], only_media_types: [:all]}.merge(options) + options[:media_types] = Util.ensure_media_types(options[:media_types]) + options[:only_media_types] = Util.ensure_media_types(options[:only_media_types]) # TODO: Would be nice to skip this step too if options[:base_uri] and @options[:absolute_paths] block = CssParser.convert_uris(block, options[:base_uri]) end - current_media_queries = [:all] - if options[:media_types] - current_media_queries = options[:media_types].flatten.collect { |mt| CssParser.sanitize_media_query(mt) } - end + current_media_queries = Util.ensure_media_types(options[:media_types] || [:all]) Crass.parse(block).each do |node| case node @@ -189,7 +232,7 @@ def add_block!(block, options = {}) add_rule!(**add_rule_options) in node: :at_rule, name: 'media' - new_media_queries = split_media_query_by_or_condition(node[:prelude]) + new_media_queries = ParserFx.split_media_query_by_or_condition(node[:prelude]) add_block!(node[:block], options.merge(media_types: new_media_queries)) in node: :at_rule, name: 'page' @@ -244,7 +287,7 @@ def add_block!(block, options = {}) media_query_section = [] loop { media_query_section << prelude.next } - import_options[:media_types] = split_media_query_by_or_condition(media_query_section) + import_options[:media_types] = ParserFx.split_media_query_by_or_condition(media_query_section) if import_options[:media_types].empty? import_options[:media_types] = [:all] end @@ -271,7 +314,7 @@ def add_block!(block, options = {}) # and +media_types+. Optional pass +filename+ , +offset+ for source # reference too. # - # +media_types+ can be a symbol or an array of symbols. default to :all + # +media_types+ can be a symbol or an array of media queries (:all or string). default to :all # optional fields for source location for source location # +filename+ can be a string or uri pointing to the file or url location. # +offset+ should be Range object representing the start and end byte locations where the rule was found in the file. @@ -288,43 +331,28 @@ def add_rule!(selectors: nil, block: nil, filename: nil, offset: nil, media_type # Add a CssParser RuleSet object. # - # +media_types+ can be a symbol or an array of symbols. + # +media_types+ can be a symbol or an media queries (:all or string). def add_rule_set!(ruleset, media_types = :all) raise ArgumentError unless ruleset.is_a?(CssParser::RuleSet) - media_types = [media_types] unless media_types.is_a?(Array) - media_types = media_types.flat_map { |mt| CssParser.sanitize_media_query(mt) } + media_types = Util.ensure_media_types(media_types) @rules << {media_types: media_types, rules: ruleset} end # Remove a CssParser RuleSet object. # - # +media_types+ can be a symbol or an array of symbols. + # +media_types+ can be a symbol or an media queries (:all or string). def remove_rule_set!(ruleset, media_types = :all) raise ArgumentError unless ruleset.is_a?(CssParser::RuleSet) - media_types = [media_types].flatten.collect { |mt| CssParser.sanitize_media_query(mt) } + media_types = Util.ensure_media_types(media_types) @rules.reject! do |rule| rule[:media_types] == media_types && rule[:rules].to_s == ruleset.to_s end end - # Iterate through RuleSet objects. - # - # +media_types+ can be a symbol or an array of symbols. - def each_rule_set(media_types = :all) # :yields: rule_set, media_types - media_types = [:all] if media_types.nil? - media_types = [media_types].flatten.collect { |mt| CssParser.sanitize_media_query(mt) } - - @rules.each do |block| - if media_types.include?(:all) or block[:media_types].any? { |mt| media_types.include?(mt) } - yield(block[:rules], block[:media_types]) - end - end - end - # Output all CSS rules as a Hash def to_h(which_media = :all) out = {} @@ -346,20 +374,6 @@ def to_h(which_media = :all) out end - # Iterate through CSS selectors. - # - # +media_types+ can be a symbol or an array of symbols. - # See RuleSet#each_selector for +options+. - def each_selector(all_media_types = :all, options = {}) # :yields: selectors, declarations, specificity, media_types - return to_enum(__method__, all_media_types, options) unless block_given? - - each_rule_set(all_media_types) do |rule_set, media_types| - rule_set.each_selector(options) do |selectors, declarations, specificity| - yield selectors, declarations, specificity, media_types - end - end - end - # Output all CSS rules as a single stylesheet. def to_s(which_media = :all) out = [] @@ -391,241 +405,8 @@ def to_s(which_media = :all) out.join("\n") end - # A hash of { :media_query => rule_sets } - def rules_by_media_query - rules_by_media = {} - @rules.each do |block| - block[:media_types].each do |mt| - unless rules_by_media.key?(mt) - rules_by_media[mt] = [] - end - rules_by_media[mt] << block[:rules] - end - end - - rules_by_media - end - - # Merge declarations with the same selector. - def compact! # :nodoc: - [] - end - - # Load a remote CSS file. - # - # You can also pass in file://test.css - # - # See add_block! for options. - # - # Deprecated: originally accepted three params: `uri`, `base_uri` and `media_types` - def load_uri!(uri, options = {}, deprecated = nil) - uri = Addressable::URI.parse(uri) unless uri.respond_to? :scheme - - opts = {base_uri: nil, media_types: :all} - - if options.is_a? Hash - opts.merge!(options) - else - opts[:base_uri] = options if options.is_a? String - opts[:media_types] = deprecated if deprecated - end - - if uri.scheme == 'file' or uri.scheme.nil? - uri.path = File.expand_path(uri.path) - uri.scheme = 'file' - end - - opts[:base_uri] = uri if opts[:base_uri].nil? - - # pass on the uri if we are capturing file offsets - opts[:filename] = uri.to_s if opts[:capture_offsets] - - src, = read_remote_file(uri) # skip charset - - add_block!(src, opts) if src - end - - # Load a local CSS file. - def load_file!(file_name, options = {}, deprecated = nil) - opts = {base_dir: nil, media_types: :all} - - if options.is_a? Hash - opts.merge!(options) - else - opts[:base_dir] = options if options.is_a? String - opts[:media_types] = deprecated if deprecated - end - - file_name = File.expand_path(file_name, opts[:base_dir]) - return unless File.readable?(file_name) - return unless circular_reference_check(file_name) - - src = File.read(file_name) - - opts[:filename] = file_name if opts[:capture_offsets] - opts[:base_dir] = File.dirname(file_name) - - add_block!(src, opts) - end - - # Load a local CSS string. - def load_string!(src, options = {}, deprecated = nil) - opts = {base_dir: nil, media_types: :all} - - if options.is_a? Hash - opts.merge!(options) - else - opts[:base_dir] = options if options.is_a? String - opts[:media_types] = deprecated if deprecated - end - - add_block!(src, opts) - end - - protected - - # Check that a path hasn't been loaded already - # - # Raises a CircularReferenceError exception if io_exceptions are on, - # otherwise returns true/false. - def circular_reference_check(path) - path = path.to_s - if @loaded_uris.include?(path) - raise CircularReferenceError, "can't load #{path} more than once" if @options[:io_exceptions] - - false - else - @loaded_uris << path - true - end - end - - # Download a file into a string. - # - # Returns the file's data and character set in an array. - #-- - # TODO: add option to fail silently or throw and exception on a 404 - #++ - def read_remote_file(uri) # :nodoc: - if @redirect_count.nil? - @redirect_count = 0 - else - @redirect_count += 1 - end - - unless circular_reference_check(uri.to_s) - @redirect_count = nil - return nil, nil - end - - if @redirect_count > MAX_REDIRECTS - @redirect_count = nil - return nil, nil - end - - src = '', charset = nil - - begin - uri = Addressable::URI.parse(uri.to_s) - - if uri.scheme == 'file' - # local file - path = uri.path - path.gsub!(%r{^/}, '') if Gem.win_platform? - src = File.read(path, mode: 'rb') - else - # remote file - if uri.scheme == 'https' - uri.port = 443 unless uri.port - http = Net::HTTP.new(uri.host, uri.port) - http.use_ssl = true - http.verify_mode = OpenSSL::SSL::VERIFY_NONE - else - http = Net::HTTP.new(uri.host, uri.port) - end - - res = http.get(uri.request_uri, {'User-Agent' => @options[:user_agent], 'Accept-Encoding' => 'gzip'}) - src = res.body - charset = res.respond_to?(:charset) ? res.encoding : 'utf-8' - - if res.code.to_i >= 400 - @redirect_count = nil - raise RemoteFileError, uri.to_s if @options[:io_exceptions] - - return '', nil - elsif res.code.to_i >= 300 and res.code.to_i < 400 - unless res['Location'].nil? - return read_remote_file Addressable::URI.parse(Addressable::URI.escape(res['Location'])) - end - end - - case res['content-encoding'] - when 'gzip' - io = Zlib::GzipReader.new(StringIO.new(res.body)) - src = io.read - when 'deflate' - io = Zlib::Inflate.new - src = io.inflate(res.body) - end - end - - if charset - if String.method_defined?(:encode) - src.encode!('UTF-8', charset) - else - ic = Iconv.new('UTF-8//IGNORE', charset) - src = ic.iconv(src) - end - end - rescue - @redirect_count = nil - raise RemoteFileError, uri.to_s if @options[:io_exceptions] - - return nil, nil - end - - @redirect_count = nil - [src, charset] - end - private - def split_media_query_by_or_condition(media_query_selector) - media_query_selector - .each_with_object([[]]) do |token, sum| - # comma is the same as or - # https://developer.mozilla.org/en-US/docs/Web/CSS/@media#logical_operators - case token - in node: :comma - sum << [] - in node: :ident, value: 'or' # rubocop:disable Lint/DuplicateBranch - sum << [] - else - sum.last << token - end - end # rubocop:disable Style/MultilineBlockChain - .map { Crass::Parser.stringify(_1).strip } - .reject(&:empty?) - .map(&:to_sym) - end - - # Save a folded declaration block to the internal cache. - def save_folded_declaration(block_hash, folded_declaration) # :nodoc: - @folded_declaration_cache[block_hash] = folded_declaration - end - - # Retrieve a folded declaration block from the internal cache. - def get_folded_declaration(block_hash) # :nodoc: - @folded_declaration_cache[block_hash] ||= nil - end - - def reset! # :nodoc: - @folded_declaration_cache = {} - @css_source = '' - @css_rules = [] - @css_warnings = [] - end - # recurse through nested nodes and return them as Hashes nested in # passed hash def css_node_to_h(hash, key, val) diff --git a/lib/css_parser/file_resource.rb b/lib/css_parser/file_resource.rb new file mode 100644 index 0000000..e54fcdb --- /dev/null +++ b/lib/css_parser/file_resource.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +module CssParser + class FileResource + # Exception class used if a request is made to load a CSS file more than once. + class CircularReferenceError < StandardError; end + + def initialize(io_exceptions:) + @io_exceptions = io_exceptions + + @loaded_files = [] + end + + # Check that a path hasn't been loaded already + # + # Raises a CircularReferenceError exception if io_exceptions are on, + # otherwise returns true/false. + def circular_reference_check(path) + path = path.to_s + if @loaded_files.include?(path) + raise CircularReferenceError, "can't load #{path} more than once" if @io_exceptions + + false + else + @loaded_files << path + true + end + end + + def find_file(file_name, base_dir:) + path = File.expand_path(file_name, base_dir) + return unless File.readable?(path) + return unless circular_reference_check(path) + + path + end + end +end diff --git a/lib/css_parser/http_read_url.rb b/lib/css_parser/http_read_url.rb new file mode 100644 index 0000000..e990607 --- /dev/null +++ b/lib/css_parser/http_read_url.rb @@ -0,0 +1,126 @@ +# frozen_string_literal: true + +module CssParser + class HTTPReadURL + MAX_REDIRECTS = 3 + + # Exception class used if a request is made to load a CSS file more than once. + class CircularReferenceError < StandardError; end + + # Exception class used for any errors encountered while downloading remote files. + class RemoteFileError < IOError; end + + def initialize(agent:, io_exceptions:) + @agent = agent + @io_exceptions = io_exceptions + + @redirect_count = nil + @loaded_uris = [] + end + + # Check that a path hasn't been loaded already + # + # Raises a CircularReferenceError exception if io_exceptions are on, + # otherwise returns true/false. + def circular_reference_check(path) + path = path.to_s + if @loaded_uris.include?(path) + raise CircularReferenceError, "can't load #{path} more than once" if @io_exceptions + + false + else + @loaded_uris << path + true + end + end + + # Download a file into a string. + # + # Returns the file's data and character set in an array. + #-- + # TODO: add option to fail silently or throw and exception on a 404 + #++ + def read_remote_file(uri) # :nodoc: + if @redirect_count.nil? + @redirect_count = 0 + else + @redirect_count += 1 + end + + # TODO: has to be done on the outside + unless circular_reference_check(uri.to_s) + @redirect_count = nil + return nil, nil + end + + if @redirect_count > MAX_REDIRECTS + @redirect_count = nil + return nil, nil + end + + src = '', charset = nil + + begin + uri = Addressable::URI.parse(uri.to_s) + + if uri.scheme == 'file' + # local file + path = uri.path + path.gsub!(%r{^/}, '') if Gem.win_platform? + src = File.read(path, mode: 'rb') + else + # remote file + if uri.scheme == 'https' + uri.port = 443 unless uri.port + http = Net::HTTP.new(uri.host, uri.port) + http.use_ssl = true + http.verify_mode = OpenSSL::SSL::VERIFY_NONE + else + http = Net::HTTP.new(uri.host, uri.port) + end + + res = http.get(uri.request_uri, {'User-Agent' => @agent, 'Accept-Encoding' => 'gzip'}) + src = res.body + charset = res.respond_to?(:charset) ? res.encoding : 'utf-8' + + if res.code.to_i >= 400 + @redirect_count = nil + raise RemoteFileError, uri.to_s if @io_exceptions + + return '', nil + elsif res.code.to_i >= 300 and res.code.to_i < 400 + unless res['Location'].nil? + return read_remote_file(Addressable::URI.parse(Addressable::URI.escape(res['Location']))) + end + end + + case res['content-encoding'] + when 'gzip' + io = Zlib::GzipReader.new(StringIO.new(res.body)) + src = io.read + when 'deflate' + io = Zlib::Inflate.new + src = io.inflate(res.body) + end + end + + if charset + if String.method_defined?(:encode) + src.encode!('UTF-8', charset) + else + ic = Iconv.new('UTF-8//IGNORE', charset) + src = ic.iconv(src) + end + end + rescue + @redirect_count = nil + raise RemoteFileError, uri.to_s if @io_exceptions + + return nil, nil + end + + @redirect_count = nil + [src, charset] + end + end +end diff --git a/lib/css_parser/parser_fx.rb b/lib/css_parser/parser_fx.rb new file mode 100644 index 0000000..965c0a6 --- /dev/null +++ b/lib/css_parser/parser_fx.rb @@ -0,0 +1,63 @@ +# frozen_string_literal: true + +module CssParser + # We have a Parser class which you create and instance of but we have some + # functions which is nice to have outside of this instance + # + # Intended as private helpers for lib. Breaking changed with no warning + module ParserFx + # Receives properties from a style_rule node from crass. + def self.create_declaration_from_properties(properties) + declarations = RuleSet::Declarations.new + + properties.each do |child| + case child + in node: :property, value: '' # nothing, happen for { color:green; color: } + in node: :property + declarations.add_declaration!( + child[:name], + RuleSet::Declarations::Value.new(child[:value], important: child[:important]) + ) + in node: :whitespace # nothing + in node: :semicolon # nothing + in node: :error # nothing + end + end + + declarations + end + + # it is expecting the selector tokens from node: :style_rule, not just + # from Crass::Tokenizer.tokenize(input) + def self.split_selectors(tokens) + tokens + .each_with_object([[]]) do |token, sum| + case token + in node: :comma + sum << [] + else + sum.last << token + end + end + end + + # expect tokens from crass + def self.split_media_query_by_or_condition(media_query_selector) + media_query_selector + .each_with_object([[]]) do |token, sum| + # comma is the same as or + # https://developer.mozilla.org/en-US/docs/Web/CSS/@media#logical_operators + case token + in node: :comma + sum << [] + in node: :ident, value: 'or' # rubocop:disable Lint/DuplicateBranch + sum << [] + else + sum.last << token + end + end # rubocop:disable Style/MultilineBlockChain + .map { Crass::Parser.stringify(_1).strip } + .reject(&:empty?) + end + end +end diff --git a/test/test_css_parser_basic.rb b/test/test_css_parser_basic.rb index 5efcf83..e13138e 100644 --- a/test/test_css_parser_basic.rb +++ b/test/test_css_parser_basic.rb @@ -7,7 +7,7 @@ class CssParserBasicTests < Minitest::Test include CssParser def setup - @cp = CssParser::Parser.new + @cp = Document.new @css = <<-CSS html, body, p { margin: 0px; } p { padding: 0px; } @@ -55,7 +55,7 @@ def test_removing_a_rule_set def test_toggling_uri_conversion # with conversion - cp_with_conversion = Parser.new(absolute_paths: true) + cp_with_conversion = Document.new(absolute_paths: true) cp_with_conversion.add_block!("body { background: url('../style/yellow.png?abc=123') };", base_uri: 'http://example.org/style/basic.css') @@ -63,7 +63,7 @@ def test_toggling_uri_conversion cp_with_conversion['body'].join(' ') # without conversion - cp_without_conversion = Parser.new(absolute_paths: false) + cp_without_conversion = Document.new(absolute_paths: false) cp_without_conversion.add_block!("body { background: url('../style/yellow.png?abc=123') };", base_uri: 'http://example.org/style/basic.css') @@ -72,7 +72,7 @@ def test_toggling_uri_conversion end def test_converting_to_hash - rs = CssParser::RuleSet.new(selectors: 'div', block: 'color: blue;') + rs = RuleSet.new(selectors: 'div', block: 'color: blue;') @cp.add_rule_set!(rs) hash = @cp.to_h assert_equal 'blue', hash['all']['div']['color'] diff --git a/test/test_css_parser_loading.rb b/test/test_css_parser_loading.rb index 96f1a67..fbb5dea 100644 --- a/test/test_css_parser_loading.rb +++ b/test/test_css_parser_loading.rb @@ -7,7 +7,7 @@ class CssParserLoadingTests < Minitest::Test include CssParser def setup - @cp = Parser.new + @cp = Document.new @uri_base = 'http://localhost:12000' end @@ -77,7 +77,7 @@ def test_loading_a_string def test_following_at_import_rules_local base_dir = File.expand_path('fixtures', __dir__) - @cp.load_file!('import1.css', base_dir) + @cp.load_file!('import1.css', base_dir: base_dir) # from '/import1.css' assert_equal 'color: lime;', @cp.find_by_selector('div').join(' ') @@ -109,7 +109,7 @@ def test_following_at_import_rules_remote def test_imports_disabled stub_request_file("import1.css") - cp = Parser.new(import: false) + cp = Document.new(import: false) cp.load_uri!("#{@uri_base}/import1.css") # from '/import1.css' @@ -128,7 +128,7 @@ def test_following_remote_import_rules css_block = '@import "http://example.com/css";' - assert_raises CssParser::RemoteFileError do + assert_raises HTTPReadURL::RemoteFileError do @cp.add_block!(css_block, base_uri: "#{@uri_base}/subdir/") end end @@ -139,7 +139,7 @@ def test_following_badly_escaped_import_rules css_block = '@import "http://example.com/css?family=Droid+Sans:regular,bold|Droid+Serif:regular,italic,bold,bolditalic&subset=latin";' - assert_raises CssParser::RemoteFileError do + assert_raises HTTPReadURL::RemoteFileError do @cp.add_block!(css_block, base_uri: "#{@uri_base}/subdir/") end end @@ -180,13 +180,13 @@ def test_importing_with_media_types @cp.load_uri!("#{@uri_base}/import-with-media-types.css") - # from simple.css with :screen media type - assert_equal 'margin: 0px;', @cp.find_by_selector('p', :screen).join(' ') - assert_equal '', @cp.find_by_selector('p', :tty).join(' ') + # from simple.css with screen media type + assert_equal 'margin: 0px;', @cp.find_by_selector('p', "screen").join(' ') + assert_equal '', @cp.find_by_selector('p', "tty").join(' ') end def test_local_circular_reference_exception - assert_raises CircularReferenceError do + assert_raises FileResource::CircularReferenceError do @cp.load_file!(File.expand_path('fixtures/import-circular-reference.css', __dir__)) end end @@ -194,7 +194,7 @@ def test_local_circular_reference_exception def test_remote_circular_reference_exception stub_request_file("import-circular-reference.css") - assert_raises CircularReferenceError do + assert_raises HTTPReadURL::CircularReferenceError do @cp.load_uri!("#{@uri_base}/import-circular-reference.css") end end @@ -202,7 +202,7 @@ def test_remote_circular_reference_exception def test_suppressing_circular_reference_exceptions stub_request_file("import-circular-reference.css") - cp_without_exceptions = Parser.new(io_exceptions: false) + cp_without_exceptions = Document.new(io_exceptions: false) cp_without_exceptions.load_uri!("#{@uri_base}/import-circular-reference.css") end @@ -211,15 +211,15 @@ def test_toggling_not_found_exceptions stub_request(:get, "http://localhost:12000/no-exist.xyz") .to_return(status: 404, body: "", headers: {}) - cp_with_exceptions = Parser.new(io_exceptions: true) + cp_with_exceptions = Document.new(io_exceptions: true) - err = assert_raises RemoteFileError do + err = assert_raises HTTPReadURL::RemoteFileError do cp_with_exceptions.load_uri!("#{@uri_base}/no-exist.xyz") end assert_includes err.message, "#{@uri_base}/no-exist.xyz" - cp_without_exceptions = Parser.new(io_exceptions: false) + cp_without_exceptions = Document.new(io_exceptions: false) cp_without_exceptions.load_uri!("#{@uri_base}/no-exist.xyz") end diff --git a/test/test_css_parser_media_types.rb b/test/test_css_parser_media_types.rb index 87e00e9..f705def 100644 --- a/test/test_css_parser_media_types.rb +++ b/test/test_css_parser_media_types.rb @@ -7,7 +7,7 @@ class CssParserMediaTypesTests < Minitest::Test include CssParser def setup - @cp = Parser.new + @cp = Document.new end def test_that_media_types_dont_include_all @@ -41,9 +41,9 @@ def test_finding_by_media_type } CSS - assert_equal 'font-size: 10pt; line-height: 1.2;', @cp.find_by_selector('body', :print).join(' ') - assert_equal 'font-size: 13px; line-height: 1.2; color: blue;', @cp.find_by_selector('body', :screen).join(' ') - assert_equal 'color: blue;', @cp.find_by_selector('body', :'print and resolution > 90dpi').join(' ') + assert_equal 'font-size: 10pt; line-height: 1.2;', @cp.find_by_selector('body', "print").join(' ') + assert_equal 'font-size: 13px; line-height: 1.2; color: blue;', @cp.find_by_selector('body', "screen").join(' ') + assert_equal 'color: blue;', @cp.find_by_selector('body', 'print and resolution > 90dpi').join(' ') end def test_with_parenthesized_media_features @@ -59,10 +59,10 @@ def test_with_parenthesized_media_features body { color: red } } CSS - assert_equal [:all, :'(prefers-color-scheme: dark)', :'(min-width: 500px)', :'screen and (width > 500px)'], @cp.rules_by_media_query.keys - assert_equal 'color: white;', @cp.find_by_selector('body', :'(prefers-color-scheme: dark)').join(' ') - assert_equal 'color: blue;', @cp.find_by_selector('body', :'(min-width: 500px)').join(' ') - assert_equal 'color: red;', @cp.find_by_selector('body', :'screen and (width > 500px)').join(' ') + assert_equal [:all, '(prefers-color-scheme: dark)', '(min-width: 500px)', 'screen and (width > 500px)'], @cp.rules_by_media_query.keys + assert_equal 'color: white;', @cp.find_by_selector('body', '(prefers-color-scheme: dark)').join(' ') + assert_equal 'color: blue;', @cp.find_by_selector('body', '(min-width: 500px)').join(' ') + assert_equal 'color: red;', @cp.find_by_selector('body', 'screen and (width > 500px)').join(' ') end def test_finding_by_multiple_media_types @@ -78,16 +78,16 @@ def test_finding_by_multiple_media_types } CSS - assert_equal 'font-size: 13px; line-height: 1.2;', @cp.find_by_selector('body', [:screen, :handheld]).join(' ') + assert_equal 'font-size: 13px; line-height: 1.2;', @cp.find_by_selector('body', ["screen", "handheld"]).join(' ') end def test_adding_block_with_media_types - @cp.add_block!(<<-CSS, media_types: [:screen]) + @cp.add_block!(<<-CSS, media_types: ["screen"]) body { font-size: 10pt } CSS - assert_equal 'font-size: 10pt;', @cp.find_by_selector('body', :screen).join(' ') - assert @cp.find_by_selector('body', :handheld).empty? + assert_equal 'font-size: 10pt;', @cp.find_by_selector('body', "screen").join(' ') + assert @cp.find_by_selector('body', "handheld").empty? end def test_adding_block_with_media_types_followed_by_general_rule @@ -109,7 +109,7 @@ def test_adding_block_and_limiting_media_types1 base_dir = Pathname.new(__dir__).join('fixtures') - @cp.add_block!(css, only_media_types: :screen, base_dir: base_dir) + @cp.add_block!(css, only_media_types: "screen", base_dir: base_dir) assert @cp.find_by_selector('div').empty? end @@ -130,14 +130,14 @@ def test_adding_block_and_limiting_media_types CSS base_dir = Pathname.new(__dir__).join('fixtures') - @cp.add_block!(css, only_media_types: :print, base_dir: base_dir) + @cp.add_block!(css, only_media_types: "print", base_dir: base_dir) assert_equal '', @cp.find_by_selector('div').join(' ') end def test_adding_rule_set_with_media_type - @cp.add_rule!(selectors: 'body', block: 'color: black;', media_types: [:handheld, :tty]) - @cp.add_rule!(selectors: 'body', block: 'color: blue;', media_types: :screen) - assert_equal 'color: black;', @cp.find_by_selector('body', :handheld).join(' ') + @cp.add_rule!(selectors: 'body', block: 'color: black;', media_types: ["handheld", "tty"]) + @cp.add_rule!(selectors: 'body', block: 'color: blue;', media_types: "screen") + assert_equal 'color: black;', @cp.find_by_selector('body', "handheld").join(' ') end def test_adding_rule_set_with_media_query @@ -147,7 +147,7 @@ def test_adding_rule_set_with_media_query end def test_selecting_with_all_media_types - @cp.add_rule!(selectors: 'body', block: 'color: black;', media_types: [:handheld, :tty]) + @cp.add_rule!(selectors: 'body', block: 'color: black;', media_types: ["handheld", "tty"]) assert_equal 'color: black;', @cp.find_by_selector('body', :all).join(' ') end diff --git a/test/test_css_parser_misc.rb b/test/test_css_parser_misc.rb index 4c286fb..f93bd25 100644 --- a/test/test_css_parser_misc.rb +++ b/test/test_css_parser_misc.rb @@ -7,7 +7,7 @@ class CssParserTests < Minitest::Test include CssParser def setup - @cp = Parser.new + @cp = Document.new end def test_utf8 diff --git a/test/test_css_parser_offset_capture.rb b/test/test_css_parser_offset_capture.rb index fb0bc3e..3f3ce76 100644 --- a/test/test_css_parser_offset_capture.rb +++ b/test/test_css_parser_offset_capture.rb @@ -7,7 +7,7 @@ class CssParserOffsetCaptureTests < Minitest::Test include CssParser def setup - @cp = Parser.new + @cp = Document.new end def test_capturing_offsets_for_local_file diff --git a/test/test_merging.rb b/test/test_merging.rb index 360f2ae..48f237b 100644 --- a/test/test_merging.rb +++ b/test/test_merging.rb @@ -6,7 +6,7 @@ class MergingTests < Minitest::Test include CssParser def setup - @cp = CssParser::Parser.new + @cp = Document.new end def test_simple_merge @@ -32,7 +32,7 @@ def test_merging_with_compound_selectors rules = @cp.find_rule_sets(["body", "h2"]) assert_equal "margin: 5px;", CssParser.merge(rules).declarations_to_s - @cp = CssParser::Parser.new + @cp = Document.new @cp.add_block! "body { margin: 0; }" @cp.add_block! "h2,h1 { margin: 5px; }" diff --git a/test/test_rule_set.rb b/test/test_rule_set.rb index 72254c9..1e998c8 100644 --- a/test/test_rule_set.rb +++ b/test/test_rule_set.rb @@ -8,7 +8,7 @@ class RuleSetTests < Minitest::Test include CssParser def setup - @cp = Parser.new + @cp = Document.new end def test_setting_property_values diff --git a/test/test_rule_set_creating_shorthand.rb b/test/test_rule_set_creating_shorthand.rb index f9e1e97..710824e 100644 --- a/test/test_rule_set_creating_shorthand.rb +++ b/test/test_rule_set_creating_shorthand.rb @@ -7,7 +7,7 @@ class RuleSetCreatingShorthandTests < Minitest::Test include CssParser def setup - @cp = CssParser::Parser.new + @cp = Document.new end def test_border_width diff --git a/test/test_rule_set_expanding_shorthand.rb b/test/test_rule_set_expanding_shorthand.rb index 6ae9879..f6fbfb0 100644 --- a/test/test_rule_set_expanding_shorthand.rb +++ b/test/test_rule_set_expanding_shorthand.rb @@ -6,7 +6,7 @@ class RuleSetExpandingShorthandTests < Minitest::Test include CssParser def setup - @cp = CssParser::Parser.new + @cp = Document.new end # Dimensions shorthand