/* MIRANDA LEX ANALYSER */

/**************************************************************************
 * Copyright (C) Research Software Limited 1985-90.  All rights reserved. *
 * The Miranda system is distributed as free software under the terms in  *
 * the file "COPYING" which is included in the distribution.              *
 *                                                                        *
 * Revised to C11 standard and made 64bit compatible, January 2020        *
 *------------------------------------------------------------------------*/

#include "data.h"
#include "lex.h"
#include "big.h"
#include <errno.h>

static int charclass(void);
static void chblank(char *);
static int collectstars(void);
static word directive(void);
static void hexnumeral(void);
static int identifier(int);
static void kollect(int(*f)());
static void numeral(void);
static void octnumeral(void);
static int peekch(void);
static int peekdig(void);
static void string(void);

extern word DICSPACE; /* see steer.c for default value */
/* capacity in chars of dictionary space for storing identifiers and file names
   to get a larger name space just increase this number  */
extern FILE *s_in;
extern word echoing,listing,verbosity,magic,inbnf,inlex;
word fileq=NIL; /* list of currently open-for-input files, of form
	          cons(strcons(stream,<ptr to element of 'files'>),...)*/
word insertdepth= -1,margstack=NIL,col=0,lmargin=0;
word echostack=NIL;
word lverge=0,vergstack=NIL;
char *prefixbase; /* stores prefixes for pathnames, to get static resolution */
word prefixlimit=1024; /* initial size of space for prefixes */
word prefix,prefixstack=NIL; /* current prefix, stack of old prefixes */
word atnl=1,line_no=0;
word lastline;
word litstack=NIL,linostack=NIL;
word c=' ', lastc;
extern word commandmode;
word common_stdin,common_stdinb,cook_stdin;
word litmain=0,literate=0; /* flags "literate" comment convention */
char *dic,*dicp,*dicq;
char *pathname();

void setupdic()
{ dicp=dicq=dic=malloc(DICSPACE);
  if(dic==NULL)mallocfail("dictionary");
  /* it is not permissible to realloc dic, because at the moment identifiers
     etc. contain absolute pointers into the dictionary space - so we must
     choose fairly large initial value for DICSPACE.  Fix this later */
  prefixbase=malloc(prefixlimit);
  prefixbase[0]='\0';
  prefix=0;
}

/* this allows ~login convention in filenames */
/* #define okgetpwnam
/* suppress 26.5.06 getpwnam causes runtime error when statically linked (Linux) */

#ifdef okgetpwnam
#include <pwd.h>
struct passwd *getpwnam();
#endif
char *getenv();

char *gethome(n) /* for expanding leading `~' in tokens and pathnames */
char *n;
{ struct passwd *pw;
  if(n[0]=='\0')return(getenv("HOME"));
#ifdef okgetpwnam
  if(pw=getpwnam(n))return(pw->pw_dir); 
#endif
  return(NULL);
}

#define ovflocheck if(dicq-dic>DICSPACE)dicovflo()

void dicovflo() /* is this called everywhere it should be? Check later */
{ fprintf(stderr,"\npanic: dictionary overflow\n"); exit(1); }

char *token() /* lex analyser for command language (very simple) */
{ extern char *current_script;
  word ch=getchar();
  dicq = dicp;  /* uses top of dictionary as temporary work space */
  while(ch==' '||ch=='\t')ch=getchar();
  if(ch=='~')
    { char *h;
      *dicq++ = ch;
      ch=getchar();
      while(isalnum(ch)||ch=='-'||ch=='_'||ch=='.')
	   *dicq++ = ch,ch=getchar();
	   /* NB csh does not allow `.' in user ids when expanding `~'
	      but this may be a mistake */
      *dicq='\0';
      if(h=gethome(dicp+1))
      (void)strcpy(dicp,h),dicq=dicp+strlen(dicp);
    }
#ifdef SPACEINFILENAMES
  if(ch!='"'&&ch!='<')        /* test added 9.5.06 see else part */
#endif
  while(!isspace(ch)&&ch!=EOF)
       { *dicq++ = ch; 
	 if(ch=='%')
	 if(dicq[-2]=='\\')(--dicq)[-1]='%';
	 else dicq--,(void)strcpy(dicq,current_script),dicq+=strlen(dicq);
	 ch=getchar(); }
#ifdef SPACEINFILENAMES
  else { word closeq= ch=='<'?'>':'"';  /* this branch added 9.5.06          */
         *dicq++ = ch;                 /* to allow spaces in "tok" or <tok> */
         ch=getchar();
         while(ch!=closeq&&ch!='\n'&&ch!=EOF) 
              *dicq++ = ch, ch=getchar(); 
         if(ch==closeq)*dicq++ = ch, ch=getchar(); }
#endif
  *dicq++ = '\0';
  ovflocheck;
  while(ch==' '||ch=='\t')ch=getchar();
  ungetc(ch,stdin);
  return(*dicp=='\0'?(char *)NULL:dicp);
} /* NB - if no token returns NULL rather than pointer to empty string */

char *addextn(b,s) /* if(b)force s to end in ".m", and resolve <quotes> */
word b;
char *s;
{ extern char *miralib;
  extern char linebuf[];
  word n=strlen(s);
  /* printf("addextn(%s)\n",s); /* DEBUG */
  if(s[0]=='<'&&s[n-1]=='>')
    { static int miralen=0; /* code to handle quotes added 21/1/87 */
      if(!miralen)miralen=strlen(miralib);
      strcpy(linebuf,miralib);
      linebuf[miralen]= '/';
      strcpy(linebuf+miralen+1,s+1);
      strcpy(dicp,linebuf);
      s=dicp;
      n=n+miralen-1;
      dicq=dicp+n+1;
      dicq[-1] = '\0'; /* overwrites '>' */
      ovflocheck; } else
  if(s[0]=='\"'&&s[n-1]=='\"')
    { /*strip quotes */ 
      dicq=dicp; s++;
      while(*s)*dicq++ = *s++;
      dicq[-1]='\0'; /* overwrites '"' */
      s=dicp; n=n-2;
    }
  if(!b||strcmp(s+n-2,".m")==0)return(s);
  if(s==dicp)dicq--;/*if s in scratch area at top of dic, extend in situ*/
  else { /* otherwise build new copy at top of dic */
         dicq=dicp;
         while(*s)*dicq++ = *s++;
	 *dicq = '\0'; }
  if(strcmp(dicq-2,".x")==0)dicq -= 2; else
  if(dicq[-1]=='.')dicq -= 1;
  (void)strcpy(dicq,".m");
  dicq += 3;
  ovflocheck;
  /* printf("return(%s)\n",dicp); /* DEBUG */
  return(dicp);
} /* NB - call keep(dicp) if the result is to be retained */

word brct=0;

void spaces(n)
word n;
{ while(n-- >0)putchar(' ');
}

int litname(s)
char *s;
{ word n=strlen(s); 
  return(n>=6 && strcmp(s+n-6,".lit.m")==0);
}

int getch() /* keeps track of current position in the variable "col"(column) */
{ word ch= getc(s_in);
  if(ch==EOF&&!atnl&&tl[fileq]==NIL) /* badly terminated top level file */
    { atnl=1; return('\n'); }
  if(atnl)
    { if((line_no==0&&!commandmode||magic&&line_no==1)&&litstack==NIL)
	litmain=literate= (ch=='>')||litname(get_fil(current_file));
      if(literate)
	{ word i=0;
	  while(ch!=EOF&&ch!='>')
	           { ungetc(ch,s_in);
		     line_no++;
		     (void)fgets(dicp,250,s_in);
		     if(i==0&&line_no>1)chblank(dicp); i++;
		     if(echoing)spaces(lverge),fputs(dicp,stdout);
		     ch=getc(s_in); }
	  if((i>1||line_no==1&&i==1)&&ch!=EOF)chblank(dicp);
	  if(ch=='>')
	    { if(echoing)putchar(ch),spaces(lverge);ch=getc(s_in); }
	} /* supports alternative `literate' comment convention */
      atnl=0; col= lverge+literate; 
      if(!commandmode&&ch!=EOF)line_no++; }
  if(echoing&&ch!=EOF)
    { putchar(ch);
      if(ch=='\n'&&!literate)
	if(litmain)putchar('>'),spaces(lverge);
	else spaces(lverge);
    }
  if(ch=='\t')col= ((col-lverge)/8 + 1)*8+lverge;
  else col++;
  if(ch=='\n')atnl= 1;
  return(ch); }

int blankerr=0;

void chblank(s)
char *s;
{ while(*s==' '||*s=='\t')s++;
  if(*s=='\n')return;
  syntax("formal text not delimited by blank line\n");
  blankerr=1;
  reset(); /* easiest way to recover is to pretend it was an interrupt */
}

/* getlitch gets a character from input like getch, but using C escaping
   conventions if the char is backslash -- for use in reading character
   and string constants */

int rawch;
/* it is often important to know, when certain characters are returned (e.g.
   quotes and newlines) whether they were escaped or literal */

int errch; /* for reporting unrecognised \escape */

int getlitch()
{ extern int UTF8;
  int ch=c;
  rawch = ch;
  if(ch=='\n')return(ch); /* always an error */
  if(UTF8&&ch>127)
    { /* UTF-8 uses 2 or 3 bytes for unicode points to 0xffff */
      word ch1=c=getch();
      if((ch&0xe0)==0xc0)  /* 2 bytes */
        { if((ch1&0xc0)!=0x80)
            return -5; /* not valid UTF8 */
          c=getch();
          return sto_char((ch&0x1f)<<6|ch1&0x3f); }
      word ch2=c=getch();
      if((ch&0xf0)==0xe0) /* 3 bytes */
        { if((ch1&0xc0)!=0x80||(ch2&0xc0)!=0x80)
            return -5; /* not valid UTF8 */
          c=getch();
          return sto_char((ch&0xf)<<12|(ch1&0x3f)<<6|ch2&0x3f); }
      word ch3=c=getch();
      if((ch&0xf8)==0xf0) /* 4 bytes, beyond basic multiligual plane */
        { if((ch1&0xc0)!=0x80||(ch2&0xc0)!=0x80||(ch3&0xc0)!=0x80)
            return -5; /* not valid UTF8 */
          c=getch();
          return((ch&7)<<18|(ch1&0x3f)<<12|(ch2&0x3f)<<6|ch3&0x3f); }
      return(-5);
   /* not UTF8 */
    }
  if(ch!='\\')
    { c=getch(); return(ch); }
  ch = getch();
  c = getch();
  switch(ch)
  { case '\n': return(getlitch()); /* escaped nl was handled in 'getch()' */
    case 'a': return('\a');
    case 'b': return('\b');
    case 'f': return('\f');  /* form feed */
    case 'n': return('\n');  /* newline, == linefeed */
    case 'r': return('\r');  /* carriage return */
    case 't': return('\t');
    case 'v': return('\v');
    case 'X': /* omit for Haskell escape rules, see also lines marked H */
    case 'x': if(isxdigit(c))
              { int value, N=ch=='x'?4:6; /* N=7 for Haskell escape rules */
                char hold[8];
                ch = c;
                int count=0;
             /* while(ch=='0'&&isxdigit(peekch()))ch=getch(); /* H-lose leading 0s */
                while(isxdigit(ch)&&count<N)
                     hold[count++]=ch,ch=getch();
                /* read upto N hex digits */
                hold[count] = '\0';
                sscanf(hold,"%x",&value);
                c = ch;
                return value>UMAX?-3 /* \x out of range */
                       :sto_char(value); }
              else return -2; /* \x with no hex digits */
    default: if('0'<=ch&&ch<='9')
             { word n=ch-'0',count=1,N=3; /* N=8 for Haskell escape rules */
               ch = c;
            /* while(ch=='0'&&isdigit(peekch()))ch=getch(); /* H-lose leading 0s */
               while(isdigit(ch)&&count<N)
               /* read upto N digits */
               { n = 10*n+ch-'0';
                 count++;
                 ch = getch(); }
               c = ch;
               return /* n>UMAX?-4:  /* H \decimal out of range */
                      sto_char(n); }
             if(ch=='\''||ch=='"'||ch=='\\'||ch=='`')return(ch);  /* see note */
             if(ch=='&')return -7; /* Haskell null escape, accept silently */
             errch=ch<=255?ch:'?';
             return -6;  /* unrecognised \something */
  }
}  /* note: we accept \` for ` because getlitch() is used by charclass() */

char *rdline()  /* used by the "!" command -- see steer.c */
{ extern char *current_script;
  static char linebuf[BUFSIZE];
  char *p=linebuf;
  word ch=getchar(),expansion=0;
  while(ch==' '||ch=='\t')ch=getchar();
  if(ch=='\n'||ch=='!'&&!(*linebuf))
    { /* "!!" or "!" on its own means repeat last !command */
      if(*linebuf)printf("!%s",linebuf);
      while(ch!='\n'&&ch!=EOF)ch=getchar();
      return(linebuf); }
  if(ch=='!')
    expansion=1,p=linebuf+strlen(linebuf)-1; /* p now points at old '\n' */
  else ungetc(ch,stdin);
  while((*p++ =ch=getchar())!='\n'&&ch!=EOF)
    if(p-linebuf>=BUFSIZE)
      { *p='\0';
	fprintf(stderr,"sorry, !command too long (limit=%d chars): %s...\n",
		BUFSIZE,linebuf);
	while((ch=getchar())!='\n'&&ch!=EOF);
	return(NULL);
      } else
    if(p[-1]=='%')
    if(p>linebuf+1&&p[-2]=='\\')(--p)[-1]='%'; else
      { (void)strncpy(p-1,current_script,linebuf+BUFSIZE-p);
        p = linebuf+strlen(linebuf);
        expansion = 1;
      }
  *p = '\0';
  if(expansion)printf("!%s",linebuf);
  return(linebuf); }

void setlmargin()   /* this and the next routine are used to enforce the offside
                  rule ("yylex" refuses to read a symbol if col<lmargin) */
{ margstack= cons(lmargin,margstack);
  if(lmargin<col)lmargin= col; } /* inner scope region cannot "protrude" */

void unsetlmargin()
{ if(margstack==NIL)return;  /* in case called after `syntax("..")' */
  lmargin= hd[margstack];
  margstack= tl[margstack]; }

int okulid(int);
int PREL=1;

void errclass(word val, word string) 
/* diagnose error in charclass, string or char const */
{ char *s = string==2?"char class":string?"string":"char const";
  if(val==-2)printf("\\x with no xdigits in %s\n",s); else
  if(val==-3)printf("\\hexadecimal escape out of range in %s\n",s); else
  if(val==-4)printf("\\decimal escape out of range in %s\n",s); else
  if(val==-5)printf("unrecognised character in %s"
                      "(UTF8 error)\n",s); else
  if(val==-6)printf("unrecognised escape \\%c in %s\n",errch,s); else
  if(val==-7)printf("illegal use of \\& in char const\n"); else
  printf("unknown error in %s\n",s);
  acterror(); }

word yylex()         /* called by YACC to get the next symbol */
{ extern word SYNERR,exportfiles,inexplist,sreds;
  /* SYNERR flags context sensitive syntax error detected in actions */
  if(SYNERR)return(END); /* tell YACC to go home */
  layout();
  if(c=='\n') /* can only occur in command mode */
/*  if(magic){ commandmode=0; /* expression just read, now script */
/*	       line_no=2;
/*	       return(c); } else /* no longer relevant 26.11.2019 */
    return(END);
  if(col<lmargin)
  if(c=='='&&(margstack==NIL||col>=hd[margstack]))/* && part fixes utah.bug*/
    { c = getch();
      return(ELSEQ);    /* ELSEQ means "OFFSIDE =" */
    }
  else return(OFFSIDE);
  if(c==';') /* fixes utah2.bug */
    { c=getch(); layout();
      if(c=='='&&(margstack==NIL||col>=hd[margstack]))
        { c = getch();
          return(ELSEQ);    /* ELSEQ means "OFFSIDE =" */
        }
      else return(';');
    }
  if(
  /* c=='_'&&okid(peekch()) || /* _id/_ID as lowercase id */
     isalpha(c)){ kollect(okid);
                   if(inlex==1){ layout();
                                 yylval=name();
                                 return(c=='='?LEXDEF:
                                        isconstructor(yylval)?CNAME:
                                        NAME); }
                   if(inbnf==1)
                     /* add trailing space to nonterminal to avoid clash
                        with ordinary names */
                     dicq[-1] = ' ',
                     *dicq++ = '\0';
                   return(identifier(0)); }
  if('0'<=c&&c<='9'||c=='.'&&peekdig())
  { if(c=='0'&&tolower(peekch())=='x')
      hexnumeral(); else /* added 21.11.2013 */
    if(c=='0'&&tolower(peekch())=='o')
      getch(),c=getch(),octnumeral(); /* added 21.11.2013 */
    else numeral();
    return(CONST); }
  if(c=='%'&&!commandmode)return(directive()); 
  if(c=='\'')
  { c = getch();
    yylval= getlitch();
    if(yylval<0){ errclass(yylval,0); return CONST; }
    if(!is_char(yylval))
      printf("%simpossible event while reading char const ('\\%lu\')\n",
	     echoing?"\n":"",yylval),
      acterror();
    if(rawch=='\n'||c!='\'')syntax("improperly terminated char const\n");
    else c= getch();
    return(CONST); }
  if(inexplist&&(c=='\"'||c=='<'))
    { if(!pathname())syntax("badly formed pathname in %export list\n");
      else exportfiles=strcons(addextn(1,dicp),exportfiles),
	   keep(dicp);
      return(PATHNAME); }
  if(inlex==1&&c=='`')
    { return(charclass()?ANTICHARCLASS:CHARCLASS); }
  if(c=='\"')
  { string();
    if(yylval==NIL)yylval=NILS;  /* to help typechecker! */
    return(CONST); }
  if(inbnf==2) /* fiddle to offside rule in grammars */
    if(c=='[')brct++; else if(c==']')brct--; else
    if(c=='|'&&brct==0)
      return(OFFSIDE);
  if(c==EOF)
  { if(tl[fileq]==NIL&&margstack!=NIL)return(OFFSIDE); /* to fix dtbug */
    fclose((FILE *)hd[hd[fileq]]);
    fileq= tl[fileq]; insertdepth--;
    if(fileq!=NIL&&hd[echostack])
      { if(literate)putchar('>'),spaces(lverge);
        printf("<end of insert>"); }
    s_in= fileq==NIL?stdin:(FILE *)hd[hd[fileq]];
    c= ' ';
    if(fileq==NIL)
      { lverge=c=col=lmargin=0;
        /* c=0; necessary because YACC sometimes reads 1 token past END */
	atnl=1;
	echoing=verbosity&listing;
        lastline=line_no;
	/* hack so errline can be set right if err at end of file */
	line_no=0;
	litmain=literate=0;
        return(END); }
    else { current_file = tl[hd[fileq]];
	   prefix=hd[prefixstack];
	   prefixstack=tl[prefixstack];
	   echoing=hd[echostack];
	   echostack=tl[echostack];
           lverge=hd[vergstack];
           vergstack=tl[vergstack];
           literate=hd[litstack];
           litstack=tl[litstack];
           line_no=hd[linostack];
           linostack=tl[linostack]; }
    return(yylex());  }
  lastc= c;
  c= getch();
#define try(x,y) if(c==x){ c=getch(); return(y); }
  switch(lastc) {
  case '_': if(c=='') /* underlined something */
              { c=getch();
                if(c=='<'){ c=getch(); return(LE); }
                if(c=='>'){ c=getch(); return(GE); }
                if(c=='%'&&!commandmode)return(directive());
                if(isalpha(c)) /* underlined reserved word */
	          { kollect(okulid);
                    if(dicp[1]=='_'&&dicp[2]=='')
	              return(identifier(1)); }
                syntax("illegal use of underlining\n");
                return('_'); }
	    return(lastc);
  case '-': try('>',ARROW) try('-',MINUSMINUS) return(lastc);
  case '<': try('-',LEFTARROW) try('=',LE) return(lastc);
  case '=': if(c=='>'){ syntax("unexpected symbol =>\n"); return '='; }
            try('=',EQEQ) return(lastc);
  case '+': try('+',PLUSPLUS) return(lastc);
  case '.': if(c=='.')
              { c=getch();
	        return(DOTDOT);
	      }
	    return(lastc);
  case '\\': try('/',VEL) return(lastc);
  case '>': try('=',GE) return(lastc);
  case '~': try('=',NE) return(lastc);
  case '&': if(c=='>')
              { c=getch();
		if(c=='>')yylval=1;
		else yylval=0,ungetc(c,s_in);
		c=' ';
		return(TO); }
	    return(lastc);
  case '/': try('/',DIAG) return(lastc);
  case '*': try('*',collectstars()) return(lastc);
  case ':': if(c==':')
	      { c=getch();
                if(c=='='){ c=getch(); return(COLON2EQ); }
	        else return(COLONCOLON);
	      }
	    return(lastc);
  case '$': if(
            /* c=='_'&&okid(peekch())|| /* _id/_ID as id */
              isalpha(c))
              { int t;
                kollect(okid);
                t=identifier(0);
                return(t==NAME?INFIXNAME:t==CNAME?INFIXCNAME:'$'); }
                /* the last alternative is an error - caveat */
	    if('1'<=c&&c<='9')
	      { int n=0;
		while(isdigit(c)&&n<1e6)n=10*n+c-'0',c=getch();
		if(n>sreds)
		  /* sreds==0 everywhere except in semantic redn clause */
		  printf("%ssyntax error: illegal symbol $%d%s\n",
			 echoing?"\n":"",n,n>=1e6?"...":""),
		  acterror();
		else { yylval=mkgvar(n); return(NAME); }
	      }
            if(c=='-')
              { if(!compiling)
	          syntax("unexpected symbol $-\n"); else
		  {c=getch(); yylval=common_stdin; return(CONST); }}
            /* NB we disallow recursive use of $($/+/-) inside $+ data 
               whence addition of `compiling' to premises */
            if(c==':')
              { c=getch();
                if(c!='-')syntax("unexpected symbol $:\n"); else
              { if(!compiling)
                  syntax("unexpected symbol $:-\n"); else
                  {c=getch(); yylval=common_stdinb; return(CONST); }}} /* $:- */
            if(c=='+')
              { /* if(!(commandmode&&compiling||magic))
	          syntax("unexpected symbol $+\n"); else /* disallow in scripts */
		if(!compiling)
	          syntax("unexpected symbol $+\n"); else
		{ c=getch();
		  if(commandmode)
                    yylval=cook_stdin;
		  else yylval=ap(readvals(0,0),OFFSIDE);
		  return(CONST); }}
            if(c=='$')
              { if(!(inlex==2||commandmode&&compiling))
                  syntax("unexpected symbol $$\n"); else
                { c=getch();
                  if(inlex) { yylval=mklexvar(0); return(NAME); }
                  else return(DOLLAR2); }}
            if(c=='#')
              { if(inlex!=2)syntax("unexpected symbol $#\n"); else
                  { c=getch(); yylval=mklexvar(1); return(NAME); }}
	    if(c=='*')
	      { c=getch(); yylval=ap(GETARGS,0); return(CONST); }
            if(c=='0')
              syntax("illegal symbol $0\n");
  default: return(lastc);
}}

void layout()
{L:while(c==' '||c=='\n'&&!commandmode||c=='\t') c= getch(); 
   if(c==EOF&&commandmode){ c='\n'; return; }
   if(c=='|'&&peekch()=='|'        /* ||comments */
      || col==1&&line_no==1        /* added 19.11.2013 */
         &&c=='#'&&peekch()=='!')  /* UNIX magic string */
     { while((c=getch())!='\n'&&c!=EOF);
       if(c==EOF&&!commandmode)return;
       c= '\n';
       goto L; }
}

int collectstars()
{ int n=2;
  while(c=='*')c=getch(),n++;
  yylval= mktvar(n);
  return(TYPEVAR);
}

word gvars=NIL; /* list of grammar variables - no need to reset */

word mkgvar(i)   /* make bound variable (corresponding to $i in bnf rule) */
word i;
{ word *p= &gvars;
  while(--i)
       { if(*p==NIL)*p=cons(sto_id("gvar"),NIL);
	 p= &tl[*p]; }
  if(*p==NIL)*p=cons(sto_id("gvar"),NIL);
  return(hd[*p]);
} /* all these variables have the same name, and are not in hashbucket */

word lexvar=0;

word mklexvar(i) /* similar - corresponds to $$, $# on rhs of %lex rule */
word i;  /* i=0 or 1 */
{ extern word ltchar;
  if(!lexvar)
    lexvar=cons(sto_id("lexvar"),sto_id("lexvar")),
    id_type(hd[lexvar])=ltchar,
    id_type(tl[lexvar])=genlstat_t();
  return(i?tl[lexvar]:hd[lexvar]);
}

int ARGC;
char **ARGV;  /* initialised in main(), see steer.c */

word conv_args() /* used to give access to command line args
               see case GETARGS in reduce.c */
{ word i=ARGC,x=NIL;
  if(i==0)return(NIL); /* possible only if not invoked from a magic script */
    { while(--i)x=cons(str_conv(ARGV[i]),x);
      x=cons(str_conv(ARGV[0]),x); }
  return(x);
}

word str_conv(s) /* convert C string to Miranda form */
char *s;
{ word x=NIL,i=strlen(s);
  while(i--)x=cons(s[i],x);
  return(x);
} /* opposite of getstring() - see reduce.c */

int okpath(ch)
int ch;
{ return(ch!='\"'&&ch!='\n'&&ch!='>'); }

char *pathname() /* returns NULL if not valid pathname (in string quotes) */
{ layout();
  if(c=='<') /* alternative quotes <..> for system libraries */
    { extern char *miralib;
      char *hold=dicp;
      c=getch();
      (void)strcpy(dicp,miralib);
      dicp+=strlen(miralib);
      *dicp++ = '/';
      kollect(okpath);
      dicp=hold;
      if(c!='>')return(NULL);
      c=' ';
      return(dicp); }
  if(c!='\"')return(NULL);
  c=getch();
  if(c=='~')
    { char *h,*hold=dicp;
      extern char linebuf[];
      *dicp++ = c;
      c=getch();
      while(isalnum(c)||c=='-'||c=='_'||c=='.')
           *dicp++ = c, c=getch();
      *dicp='\0';
      if(h=gethome(hold+1))
        (void)strcpy(hold,h),dicp=hold+strlen(hold);
      else (void)strcpy(&linebuf[0],hold),
	   (void)strcpy(hold,prefixbase+prefix),
           dicp=hold+strlen(prefixbase+prefix),
	   (void)strcpy(dicp,&linebuf[0]),
	   dicp+=strlen(dicp);
      kollect(okpath);
      dicp=hold;
    } else
  if(c=='/') /* absolute pathname */
    kollect(okpath);
  else { /* relative pathname */
	 char *hold=dicp;
	 (void)strcpy(dicp,prefixbase+prefix);
         dicp+=strlen(prefixbase+prefix);
         kollect(okpath);
         dicp=hold; }
  if(c!='\"')return(NULL);
  c = ' ';
  return(dicp);
} /* result is volatile - call keep(dicp) to retain */

void adjust_prefix(f) /* called at %insert and at loadfile, to get static pathname
		   resolution */
char *f;
{ /* the directory part of the pathname f becomes the new
     prefix for pathnames, and we stack the current prefix */
  char *g;
  prefixstack=strcons(prefix,prefixstack);
  prefix += strlen(prefixbase+prefix)+1;
  while(prefix+strlen(f)>=prefixlimit) /* check and fix overflow */
    prefixlimit += 1024, prefixbase=realloc(prefixbase,prefixlimit);
  (void)strcpy(prefixbase+prefix,f);
  g=rindex(prefixbase+prefix,'/');
  if(g)g[1]='\0';
  else prefixbase[prefix]='\0';
}

/* NOTES on how static pathname resolution is achieved:
(the specification is that pathnames must always be resolved relative to the
file in which they are encountered)
Definition -- the 'prefix' of a pathname is the initial segment up to but not
including the last occurrence of '/' (null if no '/' present).
Keep the wd constant during compilation.  Have a global char* prefix, initially
null.
1) Whenever you read a relative pathname(), insert 'prefix' on the front of it.
2) On entering a new level of insert, stack old prefix and prefix becomes that
   of new file name.  Done by calling adjust_prefix().
3) On quitting a level of insert, unstack old prefix.
*/

int peekdig()
{ int ch = getc(s_in);
  ungetc(ch,s_in);
  return('0'<=ch&&ch<='9');
}

int peekch()
{ word ch = getc(s_in);
  ungetc(ch,s_in);
  return(ch);
}

int openfile(n) /* returns 0 or 1 as indication of success - puts file on fileq
               if successful */
char *n;
{ FILE *f;
  f= fopen(n,"r");
  if(f==NULL)return(0);
  fileq= cons(strcons(f,NIL),fileq);
  insertdepth++;
  return(1);
}

int identifier(s)  /* recognises reserved words */
int s; /* flags looking for ul reserved words only */
{ extern word lastid,initialising;
  if(inbnf==1) 
    { /* only reserved nonterminals are `empty', `end', `error', `where' */
      if(is("empty ")||is("e_m_p_t_y"))return(EMPTYSY); else
      if(is("end ")||is("e_n_d"))return(ENDSY); else
      if(is("error ")||is("e_r_r_o_r"))return(ERRORSY); else
      if(is("where ")||is("w_h_e_r_e"))return(WHERE); }
  else
  switch(dicp[0])
  { case 'a': if(is("abstype")||is("a_b_s_t_y_p_e"))
		return(ABSTYPE);
              break;
    case 'd': if(is("div")||is("d_i_v"))
		return(DIV);
              break;
    case 'F': if(is("False")) /* True, False alleged to be predefined, not
				  reserved (??) */
                { yylval = False;
                  return(CONST); }
              break;
    case 'i': if(is("if")||is("i_f"))
		return(IF);
              break;
    case 'm': if(is("mod")||is("m_o_d"))
		return(REM);
              break;
    case 'o': if(is("otherwise")||is("o_t_h_e_r_w_i_s_e"))
		return(OTHERWISE);
	      break;
    case 'r': if(is("readvals")||is("r_e_a_d_v_a_l_s"))
		return(READVALSY);
	      break;
    case 's': if(is("show")||is("s_h_o_w"))
		return(SHOWSYM);
	      break;
    case 'T': if(is("True"))
                { yylval = True;
                  return(CONST); }
    case 't': if(is("type")||is("t_y_p_e"))
		return(TYPE);
              break;
    case 'w': if(is("where")||is("w_h_e_r_e"))
		return(WHERE);
              if(is("with")||is("w_i_t_h"))
		return(WITH);
              break;
  }
  if(s){ syntax("illegal use of underlining\n"); return('_'); }
  yylval=name(); /* not a reserved word */
  if(commandmode&&lastid==0&&id_type(yylval)!=undef_t)lastid=yylval;
  return(isconstructor(yylval)?CNAME:NAME);
}

word directive() /* these are of the form "%identifier" */
{ extern word SYNERR,magic;
  word holdcol=col-1,holdlin=line_no;
  c = getch();
  if(c=='%'){ c=getch(); return(ENDIR); }
  kollect(okulid);
  switch(dicp[0]=='_'&&dicp[1]==''?dicp[2]:dicp[0])
  { case 'b': if(is("begin")||is("_^Hb_^He_^Hg_^Hi_^Hn"))
              if(inlex)
                return(LBEGIN);
              if(is("bnf")||is("_^Hb_^Hn_^Hf"))
                { setlmargin(); col=holdcol+4;
                   /* `indent' to right hand end of directive */
                  return(BNF); }
              break;
    case 'e': if(is("export")||is("_e_x_p_o_r_t"))
		{ if(magic)syntax(
		    "%export directive not permitted in \"-exp\" script\n");
		  return(EXPORT); }
	      break;
    case 'f': if(is("free")||is("_f_r_e_e"))
		{ if(magic)syntax(
		    "%free directive not permitted in \"-exp\" script\n");
		  return(FREE); }
	      break;
    case 'i': if(is("include")||is("_i_n_c_l_u_d_e"))
	      { if(!SYNERR){ layout(); setlmargin(); }
	                 /* does `indent' for grammar */
		if(!pathname())
		  syntax("bad pathname after %include\n");
		else yylval=strcons(addextn(1,dicp),
				    fileinfo(get_fil(current_file),holdlin)),
				    /* (includee,hereinfo) */
			    keep(dicp);
		return(INCLUDE); }
              if(is("insert")||is("_i_n_s_e_r_t"))
              { char *f=pathname();
                if(!f)syntax("bad pathname after %insert\n"); else
                if(insertdepth<12&&openfile(f))
                  { adjust_prefix(f); 
		    vergstack=cons(lverge,vergstack);
		    echostack=cons(echoing,echostack);
		    litstack=cons(literate,litstack);
		    linostack=strcons(line_no,linostack);
		    line_no=0; atnl=1; /* was line_no=1; */
                    keep(dicp);
		    current_file = make_fil(f,fm_time(f),0,NIL);
		    files = append1(files,cons(current_file,NIL));
		    tl[hd[fileq]] = current_file;
                    s_in = (FILE *)hd[hd[fileq]];
		    literate= peekch()=='>'||litname(f);
		    col=lverge=holdcol;
                    if(echoing)
		      { putchar('\n'); 
			if(!literate)
		          if(litmain)putchar('>'),spaces(holdcol);
			  else spaces(holdcol); }
                    c = getch(); } /* used to precede previous cmd when echo
				      was delayed by one char, see getch() */
                else { int toomany=(insertdepth>=12);
		       printf("%s%%insert error - cannot open \"%s\"\n",
			      echoing?"\n":"",f);
		       keep(dicp);
		       if(toomany)printf(
			 "too many nested %%insert directives (limit=%ld)\n",
			 insertdepth);
		       else
		         files = append1(files,cons(make_fil(f,0,0,NIL),NIL));
		         /* line above for benefit of `oldfiles' */
            	       acterror(); }
		return(yylex()); }
	      break;
    case 'l': if(is("lex")||is("_^Hl_^He_^Hx"))
                { if(inlex)syntax("nested %lex not permitted\n");
                  /* due to use of global vars inlex, lexdefs */
                  return(LEX); }
              if(is("list")||is("_l_i_s_t"))
		{ echoing=verbosity; return(yylex()); }
	      break;  
    case 'n': if(is("nolist")||is("_n_o_l_i_s_t"))
		{ echoing=0; return(yylex()); }
	      break;
  }
  if(echoing)putchar('\n');
  printf("syntax error: unknown directive \"%%%s\"\n",dicp),
          acterror();
  return(END);
}

int okid(ch)
int ch;
{ return('a'<=ch&&ch<='z'||'A'<=ch&&ch<='Z'||'0'<=ch&&ch<='9'
          ||ch=='_'||ch=='\''); }

int okulid(ch)
int ch;
{ return('a'<=ch&&ch<='z'||'A'<=ch&&ch<='Z'||'0'<=ch&&ch<='9'
          ||ch=='_'||ch==''||ch=='\''); }

void kollect(f)
/* note top of dictionary used as work space to collect current token */
int (*f)();
{ dicq= dicp;
  while((*f)(c)){ *dicq++ = c; c= getch(); }
  *dicq++ = '\0'; 
  ovflocheck;
}

char *keep(p)  /* call this to retain volatile string for later use */
char *p;
{ if(p==dicp)dicp= dicq;
  else (void)strcpy(dicp,p),
       p=dicp,
       dicp=dicq=dicp+strlen(dicp)+1,
       dic_check();
  return(p);
}  

void dic_check()  /* called from REDUCE */
{ ovflocheck; }

void numeral()
{ word nflag=1;
  dicq= dicp;
  while(isdigit(c))
       *dicq++ = c, c=getch();
  if(c=='.'&&peekdig())
    { *dicq++ = c, c=getch(); nflag=0;
      while(isdigit(c))
           *dicq++ = c, c=getch(); }
  if(c=='e')
    { word np=0;
      *dicq++ = c, c=getch(); nflag=0;
      if(c=='+')c=getch(); else  /* ignore + before exponent */
      if(c=='-')*dicq++ = c, c=getch();
      if(!isdigit(c))  /* e must be followed by some digits */
	syntax("badly formed floating point number\n");
      while(c=='0')
           *dicq++ = c, c=getch();
      while(isdigit(c))
           np++, *dicq++ = c, c=getch();
      if(!nflag&&np>3) /* scanf falls over with silly exponents */
	{ syntax("floating point number out of range\n");
	  return; }
    }
  ovflocheck;
  if(nflag) /* `.' or `e' makes fractional */
    *dicq = '\0',
    yylval= bigscan(dicp); else
  { double r=0.0;
    if(dicq-dicp>60)  /* this allows 59 chars */
      /* scanf crashes, on VAX, gives wrong answers, on ORION 1/05 */
      { syntax("illegal floating point constant (too many digits)\n");
        return; }
    *dicq = '\n'; 
    sscanf(dicp,"%lf",&r);
    yylval= sto_dbl(r); }
}

void hexnumeral()   /* added 21.11.2013 */
{ word nflag=1;
  dicq= dicp;
  *dicq++ = c, c=getch(); /* 0 */
  *dicq++ = c, c=getch(); /* x */
  if(!isxdigit(c)&&c!='.')syntax("malformed hex number\n");
  while(c=='0'&&isxdigit(peekch()))c=getch(); /* skip zeros before first nonzero digit */
  while(isxdigit(c))
       *dicq++ = c, c=getch();
  ovflocheck;
  if(c=='.'||tolower(c)=='p') /* hex float, added 20.11.19 */
    { double d;
      if(c=='.')
        { *dicq++ = c, c=getch();
          while(isxdigit(c))
          *dicq++ = c, c=getch(); }
      if(c=='p')
        { *dicq++ = c, c=getch();
          if(c=='+'||c=='-')*dicq++ = c, c=getch();
          if(!isdigit(c))syntax("malformed hex float\n");
          while(isdigit(c))
          *dicq++ = c, c=getch(); }
      ovflocheck;
      *dicq='\0';
      if(dicq-dicp>60||sscanf(dicp,"%lf",&d)!=1)
        syntax("malformed hex float\n");
      else yylval= sto_dbl(d);
      return; }
  *dicq = '\0';
  yylval= bigxscan(dicp+2,dicq);
}

void octnumeral()   /* added 21.11.2013 */
{ word nflag=1;
  dicq= dicp;
  if(!isdigit(c))syntax("malformed octal number\n");
  while(c=='0'&&isdigit(peekch()))c=getch(); /* skip zeros before first nonzero digit */
  while(isdigit(c)&&c<='7')
       *dicq++ = c, c=getch();
  if(isdigit(c))syntax("illegal digit in octal number\n");
  ovflocheck;
  *dicq = '\0';
  yylval= bigoscan(dicp,dicq);
}

word namebucket[128]; /* each namebucket has a list terminated by 0, not NIL */

int hash(s) /* returns a value in {0..127} */
char *s;
{ int h = *s;
  if(h)while(*++s)h ^= *s; /* guard necessary to deal with s empty */
  return(h&127);
}

int isconstrname(s)
char *s;
{ if(s[0]=='$')s++;
  return isupper(*s); /* formerly !islower */
}

word getfname(x)
/* nonterminals have an added ' ', getfname returns the corresponding
   function name */
word x;
{ char *p = get_id(x);
  dicq= dicp;
  while(*dicq++ = *p++);
  if(dicq-dicp<3)fprintf(stderr,"impossible event in getfname\n"),exit(1);
  dicq[-2] = '\0'; /* overwrite last char */
  ovflocheck;
  return(name());
}

int isnonterminal(x)
word x;
{ char *n;
  if(tag[x]!=ID)return(0);
  n = get_id(x);
  return(n[strlen(n)-1]==' ');
}

word name()
{ word q,h;
  q= namebucket[h=hash(dicp)];
  while(q&&!is(get_id(hd[q])))q= tl[q];
  if(q==0)
  { q = sto_id(dicp);
    namebucket[h] = cons(q,namebucket[h]);
    keep(dicp); }
  else q= hd[q]; 
  return(q); }
/* note - keeping buckets sorted didn't seem to help (if anything slightly
   slower) probably because ordering only relevant if name not present, and
   outweighed by increased complexity of loop */

int inprelude=1;

word make_id(n)  /* used in mira_setup(), primdef(), predef(), all in steer.c */
char *n;
{ word x,h;
  h=hash(n);
  x = sto_id(inprelude?keep(n):n);
  namebucket[h] = cons(x,namebucket[h]);
  return(x); }

word findid(n)  /* like name() but returns NIL rather than create new id */
char *n;
{ word q;
  q= namebucket[hash(n)];
  while(q&&!strcmp(n,get_id(hd[q]))==0)q= tl[q];
  return(q?hd[q]:NIL); }

word *pnvec=0,nextpn,pn_lim=200;  /* private name vector */

void reset_pns()  /* (re)initialise private name space */
{ nextpn=0;
  if(!pnvec)
    { pnvec=(word *)malloc(pn_lim*sizeof(word));
      if(pnvec==NULL)mallocfail("pnvec"); }
}

word make_pn(val) /* create new private name with value val */
word val;
{ if(nextpn==pn_lim)
    { pn_lim+=400;
      pnvec=(word *)realloc(pnvec,pn_lim*sizeof(word));
      if(pnvec==NULL)mallocfail("pnvec"); }
  pnvec[nextpn]=strcons(nextpn,val);
  return(pnvec[nextpn++]);
}

word sto_pn(n) /* return n'th private name, extending pnvec if necessary */
word n;
{ if(n>=pn_lim)
    { while(pn_lim<=n)pn_lim+=400;
      pnvec=(word *)realloc(pnvec,pn_lim*sizeof(word));
      if(pnvec==NULL)mallocfail("pnvec"); }
  while(nextpn<=n) /* NB allocates all missing names upto and including nth*/
       pnvec[nextpn]=strcons(nextpn,UNDEF),nextpn++;
  return(pnvec[n]);
}

void mkprivate(x) /* disguise identifiers prior to removal from environment */
word x;       /* used in setting up prelude - see main() in steer.c */
{ while(x!=NIL)
  { char *s = get_id(hd[x]);
    get_id(hd[x])[0] += 128;  /* hack to make private internal name */
    x = tl[x]; }                /* NB - doesn't change hashbucket */
  inprelude=0;
}

word sl=100;

void string()
{ word p;
  word ch,badch=0;
  c = getch();
  ch= getlitch();
  p= yylval= cons(NIL,NIL);
  while(ch!=EOF&&rawch!='\"'&&rawch!='\n')
       if(ch==-7) ch=getlitch(); else /* skip \& */
       if(ch<0){ badch=ch; break; }
       else { p= tl[p]= cons(ch,NIL);
              ch= getlitch(); }
  yylval= tl[yylval];
  if(badch)errclass(badch,1);
  if(rawch=='\n')
    syntax("non-escaped newline encountered inside string quotes\n"); else
  if(ch==EOF)
  { if(echoing)putchar('\n');
    printf("syntax error: script ends inside unclosed string quotes - \n");
    printf("    \"");
    while(yylval!=NIL&& sl-- )
    { putchar(hd[yylval]);
      yylval= tl[yylval]; }
    printf("...\"\n");
    acterror(); }
}

int charclass()
{ word p;
  word ch,badch=0,anti=0;
  c = getch();
  if(c=='^')anti=1,c=getch();
  ch= getlitch();
  p= yylval= cons(NIL,NIL);
  while(ch!=EOF&&rawch!='`'&&rawch!='\n')
       if(ch==-7)ch=getlitch(); else /* skip \& */
       if(ch<0){ badch=ch; break; }
       else { if(rawch=='-'&&hd[p]!=NIL&&hd[p]!=DOTDOT)
              ch=DOTDOT; /* non-initial, non-escaped '-' */
              p= tl[p]= cons(ch,NIL);
              ch= getlitch(); }
  if(hd[p]==DOTDOT)hd[p]='-'; /* naturalise a trailing '-' */
  for(p=yylval;tl[p]!=NIL;p=tl[p]) /* move each DOTDOT to front of range */
     if(hd[tl[p]]==DOTDOT)
       { hd[tl[p]]=hd[p],hd[p]=DOTDOT;
         if(hd[tl[p]]>=hd[tl[tl[p]]])
           syntax("illegal use of '-' in [charclass]\n");
       }
  yylval= tl[yylval];
  if(badch)errclass(badch,2);
  if(rawch=='\n')
    syntax("non-escaped newline encountered in char class\n"); else
  if(ch==EOF)
  { if(echoing)putchar('\n');
    printf(
    "syntax error: script ends inside unclosed char class brackets - \n");
    printf("    [");
    while(yylval!=NIL&& sl-- )
    { putchar(hd[yylval]);
      yylval= tl[yylval]; }
    printf("...]\n");
    acterror(); }
  return(anti);
}

void reset_lex()  /* called after an error */
{ extern word errs,errline;
  extern char *current_script;
  /*printf("reset_lex()\n"); /* DEBUG */
  if(!commandmode)
    { if(!errs)errs=fileinfo(get_fil(current_file),line_no);
      /* convention, if errs set contains location of error, otherwise pick up
         from current_file and line_no */
      if(tl[errs]==0&&(char *)hd[errs]==current_script)
	 /* at end of file, so line_no has been reset to 0 */
         printf("error occurs at end of ");
      else printf("error found near line %ld of ",tl[errs]);
      printf("%sfile \"%s\"\ncompilation abandoned\n",
	       (char *)hd[errs]==current_script?"":"%insert ",
	       (char *)hd[errs]);
      if((char *)hd[errs]==current_script)
        errline=tl[errs]==0?lastline:tl[errs],errs=0;
      else { while(tl[linostack]!=NIL)linostack=tl[linostack];
	     errline=hd[linostack]; }
      /* tells editor where to find error - errline contains location of 1st
         error in main script, errs is hereinfo of upto one error in %insert
         script (each is 0 if not set) - some errors can set both */
    }
  reset_state();
}

void reset_state()  /* reset all global variables used by compiler */
{ extern word TABSTRS,SGC,newtyps,algshfns,showchain,inexplist,sreds,
	     rv_script,idsused;
  /* printf("reset_state()\n"); /* DEBUG */
  if(commandmode)
    while(c!='\n'&&c!=EOF)c=getc(s_in);  /* no echo */
  while(fileq!=NIL)fclose((FILE *)hd[hd[fileq]]),fileq=tl[fileq];
  insertdepth= -1;
  s_in=stdin;
  echostack=idsused=prefixstack=litstack=linostack=vergstack
	=margstack=NIL;
  prefix=0; prefixbase[0]='\0';
  echoing=verbosity&listing;
  brct=inbnf=sreds=inlex=inexplist=commandmode=lverge=col=lmargin=0;
  atnl=1;
  rv_script=0;
  algshfns=newtyps=showchain=SGC=TABSTRS=NIL;
  c=' ';
  line_no=0;
  litmain=literate=0;
  /* printf("exit reset_state()\n"); /* DEBUG */
}

/* end of MIRANDA LEX ANALYSER */