;*---------------------------------------------------------------------*/
;*    Copyright (c) 1996 by Manuel Serrano. All rights reserved.       */
;*                                                                     */
;*                                     ,--^,                           */
;*                               _ ___/ /|/                            */
;*                           ,;'( )__, ) '                             */
;*                          ;;  //   L__.                              */
;*                          '   \    /  '                              */
;*                               ^   ^                                 */
;*                                                                     */
;*                                                                     */
;*    This program is distributed in the hope that it will be useful.  */
;*    Use and copying of this software and preparation of derivative   */
;*    works based upon this software are permitted, so long as the     */
;*    following conditions are met:                                    */
;*           o credit to the authors is acknowledged following         */
;*             current academic behaviour                              */
;*           o no fees or compensation are charged for use, copies,    */
;*             or access to this software                              */
;*           o this copyright notice is included intact.               */
;*      This software is made available AS IS, and no warranty is made */
;*      about the software or its performance.                         */
;*                                                                     */
;*      Bug descriptions, use reports, comments or suggestions are     */
;*      welcome Send them to                                           */
;*        <Manuel.Serrano@inria.fr>                                    */
;*        Manuel Serrano                                               */
;*        INRIA -- Rocquencourt                                        */
;*        Domaine de Voluceau, BP 105                                  */
;*        78153 Le Chesnay Cedex                                       */
;*        France                                                       */
;*---------------------------------------------------------------------*/


;*=====================================================================*/
;*    serrano/prgm/project/bigloo/cigloo0.1/Parser/lexer.scm           */
;*    -------------------------------------------------------------    */
;*    Author      :  Manuel Serrano                                    */
;*    Creation    :  Fri Nov 24 11:36:25 1995                          */
;*    Last change :  Wed Feb  7 09:49:52 1996 (serrano)                */
;*    -------------------------------------------------------------    */
;*    The C lexer                                                      */
;*=====================================================================*/

;*---------------------------------------------------------------------*/
;*    The module                                                       */
;*---------------------------------------------------------------------*/
(module parser_lexer
   (import  parser_tools
	    parser_cpp)
   (export  lexer
	    (define-type-id <string>)))

;*---------------------------------------------------------------------*/
;*    define-type-id ...                                               */
;*---------------------------------------------------------------------*/
(define (define-type-id string)
   (putprop! (string->symbol string) 'typedef #t))

;*---------------------------------------------------------------------*/
;*    *keyword-list*                                                   */
;*---------------------------------------------------------------------*/
(define *keyword-list*
   '("asm"
     "auto"
     "break"
     "case"
     "char"
     "const"
     "continue"
     "default"
     "do"
     "double"
     "else"
     "entry"
     "enum"
     "extern"
     "float"
     "for"
     "fortran"
     "goto"
     "if"
     "int"
     "long"
     "register"
     "return"
     "short"
     "signed"
     "sizeof"
     "static"
     "struct"
     "switch"
     "typedef"
     "union"
     "unsigned"
     "void"
     "volatile"
     "while"

     ))

;*---------------------------------------------------------------------*/
;*    The keyword initialization                                       */
;*---------------------------------------------------------------------*/
(for-each (lambda (word)
	     (putprop! (string->symbol word) 'reserved #t))
	  *keyword-list*)

;*---------------------------------------------------------------------*/
;*    lexer ...                                                        */
;*---------------------------------------------------------------------*/
(define lexer
   (regular-grammar ((letter          (inside #\a #\z #\A #\Z))
		     (digit           (inside #\0 #\9))
		     (nonzero-digit   (inside #\1 #\9))
		     (octal-digit     (inside #\0 #\7))
		     (hex-digit       (inside #\0 #\9 #\a #\f #\A #\F))
		     (long-suffix     (or #\l #\L))
		     (unsigned-suffix (or #\u #\U)))

      ;; blank
      ((+ (in #\space #\newline #\tab #a012))
       (ignore))

      ;; comment
      (("/*" (* (or (out #\*) ((+ #\*) (out #\/ #\*)))) (+ #\*) "/")
       (ignore))

      ;; cpp rules
      ((bol (#\# (* (or (all)
			(#\\ #\Newline)
			("/*" (* (or (out #\*) ((+ #\*) (out #\/ #\*))))
			      (+ #\*) "/")))))
       (let ((coord  (the-coord input-port))
	     (string (the-string)))
	  (set-cpp-coord! coord)
	  (try (read/lalrp cpp-parser cpp-lexer (open-input-string string))
	       (lambda (escape proc mes obj)
		  (error "cyloo" "cpp parser" (list 'cpp coord mes))))
	  (set-cpp-coord! #f)
	  (ignore)))

      ;; comma
      ((#\,)
       (list 'COMMA (the-coord input-port)))

      ;; semi-comma
      ((#\;)
       (list 'SEMI-COMMA (the-coord input-port)))

      ;; dots
      ((#\.)
       (list 'DOT (the-coord input-port)))
      
      ;; bracket
      ((#\{)
       (list 'BRA-OPEN (the-coord input-port)))
      ((#\})
       (list 'BRA-CLO (the-coord input-port)))

      ;; angle
      ((#\[)
       (list 'ANGLE-OPEN (the-coord input-port)))
      ((#\])
       (list 'ANGLE-CLO (the-coord input-port)))

      ;; parenthesis
      ((#\()
       (list 'PAR-OPEN (the-coord input-port)))
      ((#\))
       (list 'PAR-CLO (the-coord input-port)))

      ;; ldots
      (("...")
       (list 'LDOTS (the-coord input-port)))
      
      ;; integer constant
      (((or (nonzero-digit (* digit))
	    (#\0 (* octal-digit))
	    ((or "0x" "0X") (+ hex-digit)))
	(? (or long-suffix
	       (long-suffix unsigned-suffix)
	       unsigned-suffix
	       (unsigned-suffix long-suffix))))
       (list 'CONSTANT (the-coord input-port) (the-string)))

      ;; floating-point constant
      ((or ((+ digit)
	    ((in #\e #\E) (? (in #\- #\+)) (+ digit))
	    (? (in #\f #\F #\l #\L)))
	   ((or ((+ digit) #\. (* digit)) (#\. (+ digit)))
	    (? ((in #\e #\E) (? (in #\- #\+)) (+ digit)))
	    (? (in #\f #\F #\l #\L))))
       (list 'CONSTANT (the-coord input-port) (the-string)))

      ;; character constant
      (((? #\L) (#\' (+ (all)) #\'))
       (list 'CONSTANT (the-coord input-port) (the-string)))

      ;; string constant
      (((? #\L) #\" (* (out #\")) #\")
       (list 'CONSTANT (the-coord input-port) (the-string)))

      ;; operators
      ((in #\* #\+ #\- #\/ #\% #\& #\~ #\! #\/ #\% #\= #\< #\> #\? #\| #\^ #\:)
       (list (the-symbol) (the-coord input-port)))

      ((or "||" "&&" "<<" ">>" "<=" ">=" "==" "!=" "->" "++" "--" "+="
	   "-=" "*=" "/=" "%=" "<<=" ">>=" "&=" "^=" "|=")
       (list (the-symbol) (the-coord input-port)))

      ;; identifier
      (((or #\_ letter) (* (or #\_ letter digit)))
       (let* ((string   (the-string))
	      (symbol   (string->symbol string))
	      (upsymbol (string->symbol (string-upcase string))))
	  (cond
	     ((getprop symbol 'reserved)
	      (list upsymbol (the-coord input-port)))
	     ((getprop symbol 'typedef)
	      ;; see the `declaration' rule in the grammar to
	      ;; discover where the `typedef' property is set.
	      (list 'TYPE-ID (the-coord input-port) string))
	     (else
	      (list 'ID (the-coord input-port) string)))))

      ;; error
      (else
       (let ((c (the-failing-char)))
	  (if (eof-object? c)
	      c
	      (list 'ERROR
		    (the-coord input-port)
		    c))))))

