class ChefZero::Solr::SolrParser

Constants

DEFAULT_FIELD

Public Class Methods

new(query_string) click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 11
def initialize(query_string)
  @query_string = query_string
  @index = 0
end

Public Instance Methods

binary_operator?(token) click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 183
def binary_operator?(token)
  [ 'AND', 'OR', '^', ':'].include?(token)
end
binary_operator_precedence(token) click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 187
def binary_operator_precedence(token)
  case token
  when '^'
    4
  when ':'
    3
  when 'AND'
    2
  when 'OR'
    1
  end
end
eof?() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 82
def eof?
  !@next_token && @index >= @query_string.length
end
next_token() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 27
def next_token
  result = peek_token
  @next_token = nil
  result
end
parse() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 16
def parse
  read_expression
end
parse_error(token, str) click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 116
def parse_error(token, str)
  raise "Error on token '#{token}' at #{@index} of '#{@query_string}': #{str}"
end
parse_token() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 33
def parse_token
  # Skip whitespace
  skip_whitespace
  return nil if eof?

  # Operators
  operator = peek_operator_token
  if operator
    @index+=operator.length
    operator
  else
    # Everything that isn't whitespace or an operator, is part of a term
    # (characters plus backslashed escaped characters)
    start_index = @index
    begin
      if @query_string[@index] == '\\'
        @index+=1
      end
      @index+=1 if !eof?
    end while !eof? && peek_term_token
    @query_string[start_index..@index-1]
  end
end
peek_operator_token() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 70
def peek_operator_token
  if ['"', '+', '-', '!', '(', ')', '{', '}', '[', ']', '^', ':'].include?(@query_string[@index])
    return @query_string[@index]
  else
    result = @query_string[@index..@index+1]
    if ['&&', '||'].include?(result)
      return result
    end
  end
  nil
end
peek_term_token() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 64
def peek_term_token
  return nil if @query_string[@index] =~ /\s/
  op = peek_operator_token
  return !op || op == '-'
end
peek_token() click to toggle source

Tokenization

# File lib/chef_zero/solr/solr_parser.rb, line 23
def peek_token
  @next_token ||= parse_token
end
read_expression() click to toggle source

Parse tree creation

# File lib/chef_zero/solr/solr_parser.rb, line 87
def read_expression
  result = read_single_expression
  # Expression is over when we hit a close paren or eof
  # (peek_token has the side effect of skipping whitespace for us, so we
  # really know if we're at eof or not)
  until peek_token == ')' || eof?
    operator = peek_token
    if binary_operator?(operator)
      next_token
    else
      # If 2 terms are next to each other, the default operator is OR
      operator = 'OR'
    end
    next_expression = read_single_expression

    # Build the operator, taking precedence into account
    if result.is_a?(Query::BinaryOperator) &&
       binary_operator_precedence(operator) > binary_operator_precedence(result.operator)
      # a+b*c -> a+(b*c)
      new_right = Query::BinaryOperator.new(result.right, operator, next_expression)
      result = Query::BinaryOperator.new(result.left, result.operator, new_right)
    else
      # a*b+c -> (a*b)+c
      result = Query::BinaryOperator.new(result, operator, next_expression)
    end
  end
  result
end
read_single_expression() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 120
def read_single_expression
  token = next_token
  # If EOF, we have a problem Houston
  if !token
    parse_error(nil, "Expected expression!")

  # If it's an unary operand, build that
  elsif unary_operator?(token)
    operand = read_single_expression
    # TODO We rely on all unary operators having higher precedence than all
    # binary operators.  Check if this is the case.
    Query::UnaryOperator.new(token, operand)

  # If it's the start of a phrase, read the terms in the phrase
  elsif token == '"'
    # Read terms until close "
    phrase_terms = []
    until (term = next_token) == '"'
      phrase_terms << Query::Term.new(term)
    end
    Query::Phrase.new(phrase_terms)

  # If it's the start of a range query, build that
  elsif token == '{' || token == '['
    left = next_token
    parse_error(left, "Expected left term in range query") if !left
    to = next_token
    parse_error(left, "Expected TO in range query") if to != "TO"
    right = next_token
    parse_error(right, "Expected left term in range query") if !right
    end_range = next_token
    parse_error(right, "Expected end range '#{end_range}") if !['}', ']'].include?(end_range)
    Query::RangeQuery.new(left, right, token == '[', end_range == ']')

  elsif token == '('
    subquery = read_expression
    close_paren = next_token
    parse_error(close_paren, "Expected ')'") if close_paren != ')'
    Query::Subquery.new(subquery)

  # If it's the end of a closure, raise an exception
  elsif ['}',']',')'].include?(token)
    parse_error(token, "Unexpected end paren")

  # If it's a binary operator, raise an exception
  elsif binary_operator?(token)
    parse_error(token, "Unexpected binary operator")

  # Otherwise it's a term.
  else
    term = Query::Term.new(token)
    if peek_token == ':'
      Query::BinaryOperator.new(term, next_token, read_single_expression)
    else
      term
    end
  end
end
skip_whitespace() click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 57
def skip_whitespace
  if @query_string[@index] =~ /\s/
    whitespace = /\s+/.match(@query_string, @index) || peek
    @index += whitespace[0].length
  end
end
unary_operator?(token) click to toggle source
# File lib/chef_zero/solr/solr_parser.rb, line 179
def unary_operator?(token)
  [ 'NOT', '+', '-' ].include?(token)
end