require "rexml/parent"
require "rexml/parseexception"

module REXML
	##
	# Represents an XML DOCTYPE declaration; that is, the contents of <!DOCTYPE
	# ... >.  DOCTYPES can be used to declare the DTD of a document, as well as
	# being used to declare entities used in the document.
	class DocType < Parent
		START = "<!DOCTYPE"
		START_RE = /\A\s*#{START}\s/um
		STOP = ">"
		STOP_RE = />/u
		SYSTEM = "SYSTEM"
		PUBLIC = "PUBLIC"
		OPEN_RE = /\A\s*\[/u
		PATTERN_RE = /\s*#{START}\s+(.*?)(\[|>)/um

		## name is the name of the doctype
		# external_id is the referenced DTD, if given
		attr_reader :name, :external_id

		##
		# Constructor
		# @param parent If first is String, it must be String and set to
		# external_id. Otherwise, it is set as the parent of this object.
		# @param first can be multiple types.  If String, name is set to this
		# and external_id is set to nil.  If DocType, the object is cloned.  If
		# Source, the source is scanned for the DOCTYPE declaration.
		def initialize( first, parent=nil )
			if first.kind_of? String
				super()
				@name = first
				@external_id = parent
			elsif first.kind_of? DocType
				super(parent)
				@name = first.name
				@external_id = first.external_id
			elsif first.kind_of? Source
				super(parent)
				md = first.match( PATTERN_RE, true )
				identity = md[1]
				close = md[2]

				identity =~ /^([!\*\w]+)(\s+\w+)?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
				@name = $1

				raise ParseException.new("DOCTYPE is missing a name", first) if @name.nil?

				@pub_sys = $2.nil? ? nil : $2.strip
				@long_name = $3.nil? ? nil : $3.strip
				@uri = $4.nil? ? nil : $4.strip
				@external_id = nil

				case @pub_sys
				when "SYSTEM"
					@external_id = "SYSTEM"
				when "PUBLIC"
					@external_id = "PUBLIC"
				else
					# Done, or junk
				end
				# If these raise nil exceptions, then the doctype was malformed
				begin
					@external_id << " #@long_name" if @long_name
					@external_id << " #@uri" if @uri
				rescue
					raise "malformed DOCTYPE declaration #$&"
				end

				return if close == ">"
				parse_entities first
			end
		end

		def clone
			DocType.new self
		end

		def write( output, indent=0 )
			indent( output, indent )
			output << START
			output << ' '
			output << @name
			output << " #@external_id" unless @external_id.nil?
			unless @children.empty?
				#output << "\n"
				next_indent = indent + 2
				#output << '   '*next_indent
				output << ' ['
				child = nil		# speed
				@children.each { |child|
					output << "\n"
					child.write( output, next_indent )
				}
				output << "\n"
				#output << '   '*next_indent
				output << "]"
			end
			output << STOP
		end

		def DocType.parse_stream source, listener
			md = source.match( PATTERN_RE, true )
			identity = md[1]
			close = md[2]

			identity =~ /^(\w+)(\s+\w+)?(\s+["'].*?['"])?(\s+['"].*?["'])?/u
			name = $1

			raise "DOCTYPE is missing a name" if name.nil?

			pub_sys = $2.nil? ? nil : $2.strip
			long_name = $3.nil? ? nil : $3.strip
			uri = $4.nil? ? nil : $4.strip

			listener.doctype name, pub_sys, long_name, uri
			return if close == ">"
			parse_entities_source source, listener
		end

		private
		def DocType.parser source
			begin
				md = source.match(/\s*(.*?)>/um)
				until md[1].strip == "]" 
					case md[1]
					when /^%/ #/u
						md = source.match(/^\s*%(.*?);/um, true)
						yield md[1]
					when AttlistDecl::START_RE
						yield AttlistDecl
					when ElementDecl::START_RE
						yield ElementDecl
					when EntityDecl::START_RE
						yield EntityDecl
					when NotationDecl::START_RE
						yield NotationDecl
					when Comment::START_RE
						yield Comment
					when Instruction::START_RE
						yield Instruction
					else
						if md.nil?
							raise "no match!"
						else
							raise "illegal entry \"#{md[1]}\" in DOCTYPE\n(match data was '#{md[0]}'"
						end
					end
					md = source.match(/\s*(.*?)>/um)
					raise ParseException.new( "Invalid end of DOCTYPE declaration \"#{source.buffer}\"", source ) if md.nil?
				end
				source.match(/\s*]\s*>/um, true)
			rescue ParseException
				raise
			rescue Exception => err
				raise
				raise ParseException.new( "Error parsing DOCTYPE declaration", source, nil, err )
			end
		end

		def DocType.parse_entities_source source, listener
			DocType.parser source do |arg|
				if arg.kind_of? String
					listener.entity arg
				else
					arg.parse_source source, listener
				end
			end
		end

		def parse_entities src
			DocType.parser src do |arg|
				if arg.kind_of? String
					add_entity_sub arg
				else
					self.add( arg.new(src) )
				end
			end
		end

		def add_entity_sub ent
		end
	end

	# We don't really handle any of these since we're not a validating
	# parser, so we can be pretty dumb about them.  All we need to be able
	# to do is spew them back out on a write()

	class Declaration < Child
		def initialize src
			super()
			md = src.match( pattern, true )
			@string = md[1]
		end

		def to_s
			@string
		end

		def write( output, indent )
			output << ('   '*indent) if indent > 0
			output << @string
		end

		def Declaration.parse_source source, listener
			md = src.match( pattern, true )
			listener.send inspect.downcase, md[1]
		end
	end
	
	class AttlistDecl < Declaration
		START = "<!ATTLIST"
		START_RE = /^\s*#{START}/um
		PATTERN_RE = /\s*(#{START}.*?>)/um
		def pattern
			PATTERN_RE
		end
	end

	class ElementDecl < Declaration
		START = "<!ELEMENT"
		START_RE = /^\s*#{START}/um
		PATTERN_RE = /^\s*(#{START}.*?)>/um
		def pattern
			PATTERN_RE
		end
	end

	class EntityDecl < Child
		START = "<!ENTITY"
		START_RE = /^\s*#{START}/um
		PUBLIC = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+PUBLIC\s+((["']).*?\3)\s+((["']).*?\5)\s*>/um
		SYSTEM = /^\s*#{START}\s+(?:%\s+)?(\w+)\s+SYSTEM\s+((["']).*?\3)(?:\s+NDATA\s+\w+)?\s*>/um
		PLAIN = /^\s*#{START}\s+(\w+)\s+((["']).*?\3)\s*>/um
		PERCENT = /^\s*#{START}\s+%\s+(\w+)\s+((["']).*?\3)\s*>/um
		# <!ENTITY name SYSTEM "...">
		# <!ENTITY name "...">
		def initialize src
			super()
			md = nil
			if src.match( PUBLIC )
				md = src.match( PUBLIC, true )
				@middle = "PUBLIC"
				@content = "#{md[2]} #{md[4]}"
			elsif src.match( SYSTEM )
				md = src.match( SYSTEM, true )
				@middle = "SYSTEM"
				@content = md[2]
			elsif src.match( PLAIN )
				md = src.match( PLAIN, true )
				@middle = ""
				@content = md[2]
			elsif src.match( PERCENT )
				md = src.match( PERCENT, true )
				@middle = ""
				@content = md[2]
			end
			raise ParseException.new("failed Entity match", src) if md.nil?
			@name = md[1]
		end

		def to_s
			rv = "<!ENTITY #@name "
			rv << "#@middle " if @middle.size > 0
			rv << @content
			rv
		end

		def write( output, indent )
			output << ('   '*indent) if indent > 0
			output << to_s
		end

		def EntityDecl.parse_source source, listener
			md = source.match( PATTERN_RE, true )
			thing = md[0].squeeze " \t\n\r"
			listener.send inspect.downcase, thing 
		end
	end

	class NotationDecl < Child
		START = "<!NOTATION"
		START_RE = /^\s*#{START}/um
		#PATTERN_RE = /^\s*(#{START}.*?>)/um
		PUBLIC = /^\s*#{START}\s+(\w[\w-]*)\s+(PUBLIC)\s+((["']).*?\4)\s*>/um
		SYSTEM = /^\s*#{START}\s+(\w[\w-]*)\s+(SYSTEM)\s+((["']).*?\4)\s*>/um
		def initialize src
			super()
			if src.match( PUBLIC )
				md = src.match( PUBLIC, true )
			elsif src.match( SYSTEM )
				md = src.match( SYSTEM, true )
			else
				raise ParseException.new( "error parsing notation: no matching pattern", src )
			end
			@name = md[1]
			@middle = md[2]
			@rest = md[3]
		end

		def to_s
			"<!NOTATION #@name #@middle #@rest>"
		end

		def write( output, indent )
			output << ('   '*indent) if indent > 0
			output << to_s
		end

		def NotationDecl.parse_source source, listener
			md = source.match( PATTERN_RE, true )
			thing = md[0].squeeze " \t\n\r"
			listener.send inspect.downcase, thing 
		end
	end
end
