--|
--| Filename        : $Source: /var/cvs/gnuada/gnade/esql/scanner.adb,v $
--| Description     : Scanner for the embedded SQL translator
--| Author          : Michael Erdmann
--| Created On      : 8.12.2000
--| Last Modified By: $Author: me $
--| Last Modified On: $Date: 2001/10/15 20:27:37 $
--| Status          : $State: Exp $
--|
--| Copyright (C) 2000 Michael Erdmann
--|
--| This program is free software; you can redistribute it and/or
--| modify it under the terms of the GNU General Public License
--| as published by the Free Software Foundation; either version 2
--| of the License, or (at your option) any later version.
--|
--| This program/code is distributed in the hope that it will be useful,
--| but WITHOUT ANY WARRANTY; without even the implied warranty of
--| MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
--| GNU General Public License for more details.
--|
--| You should have received a copy of the GNU General Public License along
--| with this program; if not, write to the Free Software Foundation, Inc.,
--| 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
--|
--| As a special exception,  if other files  instantiate  generics from this
--| unit, or you link  this unit with other files  to produce an executable,
--| this  unit  does not  by itself cause  the resulting  executable  to  be
--| covered  by the  GNU  General  Public  License.  This exception does not
--| however invalidate  any other reasons why  the executable file  might be
--| covered by the  GNU Public License.
--|
--| Functional Description
--| ======================
--| This module provides the lexical scanner for the esql translator. Each
--| token read in is copied into a output line if the user decides to accept
--| the copy. If the tokenizer has reached the end of a line, the text
--| stored in the output buffer will be written into the output file.
--| Additionaly the user may insert additional strings into the output
--| line.
--|
--| This module provides additionaly the service to print out messaged
--| according to the gcc style containing line and column number.
--|
--| Restrictions
--| ============
--| None
--|
--| References
--| ==========
--| None
--|

--* Ada
with Ada.Exceptions;                    use Ada.Exceptions;
with Ada.Strings;                       use Ada.Strings;
with Ada.Strings.Unbounded;             use Ada.Strings.Unbounded;
with Ada.Unchecked_Conversion;
with Ada.Text_IO;                       use Ada.Text_IO;
with Ada.Characters.Latin_1;            use Ada.Characters;
with Ada.Strings.Fixed;                 use Ada.Strings.Fixed;
with Ada.Characters.Handling;           use Ada.Characters.Handling;
with Options;                           use Options;

package body Scanner is

   Version : constant String := "$Id: scanner.adb,v 1.16 2001/10/15 20:27:37 me Exp $";

   type File_Reader_Type is record
          File              : File_Type;
          Line              : String(1..512);
          Line_Length       : Natural          := 1;
          Read_Pos          : Natural          := 0;
          Output            : File_Type;
          Accepted_Length   : Natural          := 0;
          Accepted_Line     : String(1..8000);
          Temp_Buffer       : String(1..8000);
          Temp_Pos          : Natural          := 0;
          Current_Character : Character        := ' ';
          Begin_Of_Token    : Natural          := 0;
          Current_Line_Nbr  : Natural          := 0;
          File_Name         : Unbounded_String := Null_Unbounded_String;
          Last_Line         : Boolean          := False;
       end record;

   Space : constant Character := ' ';
   Tab   : constant Character := Latin_1.HT;

   --|
   --| f.procedures accepts the characters read so far
   --| are copied into the output file.
   --|
   procedure Accept_Input(
      f : in out File_Reader ) is
   begin
      for i in 1..f.Temp_Pos loop
         f.Accepted_Length := f.Accepted_Length + 1;
         f.Accepted_Line( f.Accepted_Length ) := f.Temp_Buffer(i);
      end loop;
      -- Put_Line( "<" & F.Accepted_Line( 1..F.Accepted_Length ) &">" );
      f.Temp_Pos := 0;

   end Accept_Input;

   --|
   --| Don't copy the charcter to the output stream.
   --|
   procedure Ignore_Input(
       f : in out File_Reader ) is
   begin
       f.Temp_Pos := 0;
   end Ignore_Input;

   --|
   --| This procedure inserts a string into the
   --| accept buffer.
   --|
   procedure Accept_String(
      f : in out File_Reader;
      s : in String ) is
   begin
      -- Put_Line( "(" & F.Accepted_Line( 1..F.Accepted_Length ) & ")" );
      for i in S'Range loop
         f.Accepted_Length := f.Accepted_Length + 1;
         f.Accepted_Line( f.Accepted_Length ) := S(i);
      end loop;

      -- Put_Line( "<" & F.Accepted_Line( 1..F.Accepted_Length ) &">" );
   end Accept_String;

   --------------
   -- Get_Char --
   --------------

   procedure Get_Char(
       f    : in  File_Reader;
       CH   : out Character ) is
       --
       -- this function reads in the characters from the input
       -- file. Each line which has been read in completly
       -- but character wise processed.
       -- Each character read is first copied into a temporary
       -- buffer which is appended to the output line buffer,
       -- if the application accepts the token.
       -- The output buffer is written out  upon end of the line
       -- which contains only the accepted parts of the input
       -- line.
       --
       R    : Natural renames F.Read_Pos;
       T    : Natural renames F.Temp_Pos;
       L    : Natural renames F.Line_Length;
       ---
    begin
       if    R = 0                -- get_char is called the first time
          or R = L + 1            -- or end of the line is reached
       then
          if not F.Last_Line then
             begin
                f.Current_Line_Nbr := f.Current_Line_Nbr + 1;
                Get_Line( f.File, f.Line, L );
                -- this removes any CR/LF combintation as they are found
                -- with Windows NT.
                for I in 1..L loop
                   if F.Line(I) < ' ' then
                      F.Line(I) := ' ';
                   end if;
                end loop;
                if Option_DEBUG then
                   Put_Line("Processing line " & Integer'Image(F.Current_Line_Nbr));
                end if;
             exception
                when others =>
                   F.Last_Line := True;
             end;
             R  := 1;
             CH := ' ';
          else
             --
             -- If we have reached End_Of_File make sure that
             -- any saved line is written to the output
             --
             if R = 1 and then F.Accepted_Length > 0 then
                 Put_Line( f.Output, F.Accepted_Line(1..f.Accepted_Length));
                 f.Accepted_Length := 0;
                 T := 0;
             end if;
             -- if this was the last line then indicate end of file
             if Option_Debug then
                Put_Line("EOF");
             end if;

             CH := ASCII.EOT;
          end if;
       else
          if R = 1 and then F.Accepted_Length > 0 then
             Put_Line( f.Output, F.Accepted_Line(1..f.Accepted_Length) );
             f.Accepted_Length := 0;
             T := 0;
          end if;

          CH := f.Line( R );
          R  := R + 1;

          T  := T + 1;
          f.Temp_Buffer( T ) := CH;
          -- Put_Line( F.Temp_Buffer(1..T) & "|" );
       end if;
   end Get_Char;

   --|
   --| f.function reads in the characters from the input
   --| file. Each line which has been read in completly
   --| will be printed into the output stream. The trick of
   --| removing the embedded SQL string will be done later
   --|
   function Look_Ahead(
       f    : in  File_Reader) return Character is
       ---
       R    : Natural renames F.Read_Pos;
       T    : Natural renames F.Temp_Pos;
       L    : Natural renames F.Line_Length;
       ---
    begin
       if R = 0 or R = L + 1 then
          if F.Last_Line then
             return ASCII.EOT;
          else
             return ' ';
          end if;
       else
          return f.Line( R );
       end if;
   end Look_Ahead;

   --|
   --| Write out the already accepted code and the remaining part
   --|
   procedure Flush(
      f    : in out File_Reader ) is
      ---
      R    : Natural renames F.Read_Pos;
      T    : Natural renames F.Temp_Pos;
      L    : Natural renames F.Line_Length;
      ---
   begin
      if F.Accepted_Length > 0 then
         Put(f.Output, F.Accepted_Line(1..f.Accepted_Length) );
         f.Accepted_Length := 0;
      end if;
      Put_Line( f.Output, f.Line( R..L ) );
      T := 0;
      R := 0;
   end Flush;

   --|
   --| get a token from the input buffer
   --|
   procedure Get_Token(
      f     : in out File_Reader;
      Token : out Token_Type ) is
      ---
       CH                : Natural := 1;

      -- SAVE CHARACTER ----------------------------------------------------

      procedure Save_Character is
      begin
         if CH < token.Lexicon'Length then
            Token.Lexicon(CH) := f.Current_Character;
            CH                := CH + 1;
         end if;
         Get_Char(f, f.Current_Character );

      end Save_Character;

      -- GET IDENTIFIER ------------------------------------------------------
      --
      -- bug fix: 0502.2 the ' is part of an identifier in order to meet the
      --          lexical rules of SQL.
      procedure Get_Identifier is
      begin
         Token.Lexical_Unit := Identifier_Lex;
         Token.Lexicon      := Blank_Identifier;
         Save_Character;

         -- Add characters to the representation until invalid char found
         loop
            if (f.Current_Character not in 'a'..'z') and
               (f.Current_Character not in 'A'..'Z') and
               (f.Current_Character not in '0'..'9') and
               (f.Current_Character /= '_') and
--               (f.Current_Character /= '.') and
               (F.Current_Character /= ''')
            then
               exit;
            else
               Save_Character;
            end if;
         end loop;

      end Get_Identifier;


      -- GET NUMERIC LITERAL ------------------------------------------

      procedure Get_Numeric_Literal is

         -- GET INTEGER ------------------------------------------------
         procedure Get_Integer is
         begin

            loop
               if   (f.Current_Character in '0'..'9')
                 or (f.Current_Character = '_') then
                  Save_Character;
               else
                  exit;
               end if;
            end loop;

         end Get_Integer;

         -- GET EXPONENT -------------------------------------------------
         procedure Get_Exponent is
         begin
            Save_Character;
            if  (f.Current_Character = '+') or
                (f.Current_Character = '-')
            then
               Save_Character;
            end if;
            Get_Integer;
         end Get_Exponent;


         -- GET EXTENDED INTEGER -----------------------------------
         procedure Get_Extended_Integer is
         begin
            loop
              if (f.Current_Character in '0'..'9') or
                 (f.Current_Character = '_') or
                 (f.Current_Character in 'a'..'f')
              then
                 Save_Character;
              else
                 exit;
              end if;
           end loop;
         end Get_Extended_Integer;


      begin -- GET NUMERIC LITERAL

         Token.Lexical_Unit := Numeric_Literal_Lex;
         Token.Lexicon      := Blank_Identifier;
         Save_Character;

         Get_Integer;

         if f.Current_Character = '.' then
            Save_Character;
            Get_Integer;
            if f.Current_Character = 'E' then
               Save_Character;
               Get_Exponent;
            end if;

         elsif f.Current_Character = '#' then
            Save_Character;
            Get_Extended_Integer;
            if f.Current_Character = '.' then
               Save_Character;
               Get_Extended_Integer;
            end if;
            if f.Current_Character = '#' then
               Save_Character;
               Get_Exponent;
            end if;
         end if;
      end Get_Numeric_Literal;

      -- GET STRING -----------------------------------------------
      procedure Get_Ada_String is
         String_Begin : Integer := F.Current_Line_Nbr;
      begin
         Token.Lexical_Unit := String_Lex;
         Token.Lexicon      := Blank_Identifier;
         Save_Character;

         -- Add chars until """ " or """ or """" found
         loop
            if f.Current_Character = '"' then
               Save_Character;

               if f.Current_Character = '"' then
                  Save_Character;
               else
                  exit;
               end if;
            else
               Save_Character;
            end if;

            if F.Current_Character = ASCII.EOT  then
               Message( F, " error : end of file in string starting at" &
                            Integer'Image(String_Begin) );
               raise Lexical_Error;
            end if;

            if Is_Control( F.Current_Character ) then
               Message( F, " error : control character in string starting at" &
                            Integer'Image(String_Begin) );
               raise Lexical_Error;
            end if;

         end loop;
      end Get_Ada_String;

      --|
      --| This is an SQL Style string
      --|
      procedure Get_SQL_String is
         String_Begin : Integer := F.Current_Line_Nbr;
      begin
         Token.Lexical_Unit := String_Lex;
         Token.Lexicon      := Blank_Identifier;
         Save_Character;

         -- Add chars until """ " or """ or """" found
         loop
            if f.Current_Character = ''' then
               Save_Character;

               if f.Current_Character = ''' then
                  Save_Character;
               else
                  exit;
               end if;
            else
               Save_Character;
            end if;

            if F.Current_Character = ASCII.EOT  then
               Message( F, " error : end of file in string starting at" &
                            Integer'Image(String_Begin) );
               raise Lexical_Error;
            end if;

            if Is_Control( F.Current_Character ) then
               Message( F, " error : control character in string starting at" &
                            Integer'Image(String_Begin) );
               raise Lexical_Error;
            end if;

         end loop;
      end Get_SQL_String;

      -- GET COMMENT --------------------------------------------------
      procedure Get_Comment is
      begin
         Token.Lexical_Unit := Comment_Lex;
         Token.Lexicon      := Blank_Identifier;
         Save_Character;

         if f.Current_Character = '-' then
            Accept_Input(f);
            Flush(f);
            Save_Character;
         else
            Token.Lexical_Unit := Delimiter_Lex;
         end if;
      end Get_Comment;

      -- GET DELIMITER -----------------------------------------------
      procedure Get_Delimiter is
      begin

         Token.Lexical_Unit := Delimiter_Lex;
         Token.Lexicon      := Blank_Identifier;

         -- Check for the single and double operators

         case f.Current_Character is
            when '=' =>
               Save_Character;
               if f.Current_Character = '>' then
                  Save_Character;
               end if;
            when '.' =>
               Save_Character;
               if f.Current_Character = '.' then
                  Save_Character;
               end if;
            when '*' =>
               Save_Character;
               if f.Current_Character = '*' then
                  Save_Character;
               end if;
            when ':' =>
               Save_Character;
               if f.Current_Character = '=' then
                  Save_Character;
               end if;
            when '/' =>
               Save_Character;
               if f.Current_Character = '=' then
                  Save_Character;
               end if;
            when '>' =>
               Save_Character;
               if (f.Current_Character = '=') or (f.Current_Character = '>') then
                  Save_Character;
               end if;
            when '<' =>
               Save_Character;
               if    (f.Current_Character = '=')
                  or (f.Current_Character = '<')
                  or (f.Current_Character = '>') then
                  Save_Character;
               end if;
            when '&' | '(' | ')' | '+' | ',' | '-' | ';' | '|' =>
               Save_Character;
            when ASCII.EOT =>
               Token.Lexical_Unit := End_Of_File_Lex;
            when others =>
               Token.Lexical_Unit := Unknown_Lex;
               Save_Character;
         end case;
      end Get_Delimiter;

   begin -- GET TOKEN
      -- Skip white space
      while (f.Current_Character = Space) or (f.Current_Character = Tab) loop
         Get_Char(f, f.Current_Character);
      end loop;

      f.Begin_Of_Token := f.Read_Pos;

      -- The first character determines token type
      case f.Current_Character is

         when 'a'..'z' | 'A'..'Z' | '0'..'9' =>
            Get_Identifier;

 --        when '0'..'9'            =>
 --           Get_Numeric_Literal;

         when Latin_1.Quotation   =>
            Get_Ada_String;

         when ''' =>                      -- bug fix: 0802.1
            Get_SQL_String;

         when '-'                 =>
            Get_Comment;

         when others              =>
            Get_Delimiter;

      end case;
   end Get_Token;


   -------------
   -- Comment --
   -------------
   procedure Comment(
      f    : in out File_Reader;
      text : in String ) is
      -- Place a comment in the output file
   begin
      Put_Line(
         f.Output,
         "--% " & To_String( F.File_Name ) & " at" & Natural'Image(f.Current_Line_Nbr) &
         " : "  & text
      );
   end Comment;

   --|
   --| Place a comment in the output file
   --|
   procedure Insert(
      f    : in out File_Reader;
      text : in String ) is
   begin
      Put_Line(f.Output, text );
   end Insert;

   --|
   --| Indicate a syntax error
   --|
   procedure Message(
      f    : in File_Reader;
      text : in String ) is
      ---
      Col  : Natural renames f.Begin_of_Token;
      Row  : Natural renames f.Current_Line_Nbr;
      ---
   begin
      Put_Line( Standard_Error,
         To_String(f.File_Name) & ":" &
         Natural'Image(Row)     & ":" &
         Natural'Image(Col)     & ":" &
         text );
   end Message;

   ------------------
   -- Current_Line --
   ------------------
   function Current_Line(
      F : in File_Reader ) return Natural is
      -- Get the current line number
   begin
      return F.Current_Line_Nbr;
   end Current_Line;

   ----------
   -- Open --
   ----------
   function Open(
      Input_File_Name  : in String;
      Output_File_Name : in String ) return File_Reader is
      ---
      Result : File_Reader := new File_Reader_Type;
      ---
   begin
       Open( File => Result.File,
             Mode => In_File,
             Name => Input_File_Name );

       Create( Result.Output,
             Mode => Out_File,
             Name => Output_File_Name );

       Result.File_Name := To_Unbounded_String(Input_File_Name);
       return Result;
   end Open;

   -----------
   -- Close --
   -----------
   procedure Close(
      f : in out File_Reader ) is
   begin
      Close( f.File );
      Close( f.Output );
   end Close;

   ------------
   -- Delete --
   ------------
   procedure Delete(
      f : in out File_Reader ) is
   begin
      Delete( f.Output );
   end Delete;

end Scanner;


