summaryrefslogtreecommitdiff
path: root/new/lex.c
diff options
context:
space:
mode:
Diffstat (limited to 'new/lex.c')
-rw-r--r--new/lex.c1213
1 files changed, 0 insertions, 1213 deletions
diff --git a/new/lex.c b/new/lex.c
deleted file mode 100644
index a4e9d09..0000000
--- a/new/lex.c
+++ /dev/null
@@ -1,1213 +0,0 @@
-/* MIRANDA LEX ANALYSER */
-
-/**************************************************************************
- * Copyright (C) Research Software Limited 1985-90. All rights reserved. *
- * The Miranda system is distributed as free software under the terms in *
- * the file "COPYING" which is included in the distribution. *
- *------------------------------------------------------------------------*/
-
-#include "data.h"
-#include "lex.h"
-#include <errno.h>
-
-extern word DICSPACE; /* see steer.c for default value */
-/* capacity in chars of dictionary space for storing identifiers and file names
- to get a larger name space just increase this number */
-extern FILE *s_in;
-extern word echoing,listing,verbosity,magic,inbnf,inlex;
-word fileq=NIL; /* list of currently open-for-input files, of form
- cons(strcons(stream,<ptr to element of 'files'>),...)*/
-word insertdepth= -1,margstack=NIL,col=0,lmargin=0;
-word echostack=NIL;
-word lverge=0,vergstack=NIL;
-char *prefixbase; /* stores prefixes for pathnames, to get static resolution */
-word prefixlimit=1024; /* initial size of space for prefixes */
-word prefix,prefixstack=NIL; /* current prefix, stack of old prefixes */
-word atnl=1,line_no=0;
-word lastline;
-word litstack=NIL,linostack=NIL;
-word c=' ', lastc;
-word commandmode;
-word common_stdin,common_stdinb,cook_stdin;
-word litmain=0,literate=0; /* flags "literate" comment convention */
-char *dic,*dicp,*dicq;
-char *pathname();
-
-setupdic()
-{ dicp=dicq=dic=malloc(DICSPACE);
- if(dic==NULL)mallocfail("dictionary");
- /* it is not permissible to realloc dic, because at the moment identifiers
- etc. contain absolute pointers into the dictionary space - so we must
- choose fairly large initial value for DICSPACE. Fix this later */
- prefixbase=malloc(prefixlimit);
- prefixbase[0]='\0';
- prefix=0;
-}
-
-/* this allows ~login convention in filenames */
-/* #define okgetpwnam
-/* suppress 26.5.06 getpwnam cases runtime error when statically linked (Linux) */
-
-#ifdef okgetpwnam
-#include <pwd.h>
-struct passwd *getpwnam();
-#endif
-char *getenv();
-
-char *gethome(n) /* for expanding leading `~' in tokens and pathnames */
-char *n;
-{ struct passwd *pw;
- if(n[0]=='\0')return(getenv("HOME"));
-#ifdef okgetpwnam
- if(pw=getpwnam(n))return(pw->pw_dir);
-#endif
- return(NULL);
-}
-
-#define ovflocheck if(dicq-dic>DICSPACE)dicovflo()
-
-dicovflo() /* is this called everywhere it should be? Check later */
-{ fprintf(stderr,"\npanic: dictionary overflow\n"); exit(1); }
-
-char *token() /* lex analyser for command language (very simple) */
-{ extern char *current_script;
- word ch=getchar();
- dicq = dicp; /* uses top of dictionary as temporary work space */
- while(ch==' '||ch=='\t')ch=getchar();
- if(ch=='~')
- { char *h;
- *dicq++ = ch;
- ch=getchar();
- while(isalnum(ch)||ch=='-'||ch=='_'||ch=='.')
- *dicq++ = ch,ch=getchar();
- /* NB csh does not allow `.' in user ids when expanding `~'
- but this may be a mistake */
- *dicq='\0';
- if(h=gethome(dicp+1))
- (void)strcpy(dicp,h),dicq=dicp+strlen(dicp);
- }
-#ifdef SPACEINFILENAMES
- if(ch!='"'&&ch!='<') /* test added 9.5.06 see else part */
-#endif
- while(!isspace(ch)&&ch!=EOF)
- { *dicq++ = ch;
- if(ch=='%')
- if(dicq[-2]=='\\')(--dicq)[-1]='%';
- else dicq--,(void)strcpy(dicq,current_script),dicq+=strlen(dicq);
- ch=getchar(); }
-#ifdef SPACEINFILENAMES
- else { word closeq= ch=='<'?'>':'"'; /* this branch added 9.5.06 */
- *dicq++ = ch; /* to allow spaces in "tok" or <tok> */
- ch=getchar();
- while(ch!=closeq&&ch!='\n'&&ch!=EOF)
- *dicq++ = ch, ch=getchar();
- if(ch==closeq)*dicq++ = ch, ch=getchar(); }
-#endif
- *dicq++ = '\0';
- ovflocheck;
- while(ch==' '||ch=='\t')ch=getchar();
- ungetc(ch,stdin);
- return(*dicp=='\0'?(char *)NULL:dicp);
-} /* NB - if no token returns NULL rather than pointer to empty string */
-
-char *addextn(b,s) /* if(b)force s to end in ".m", and resolve <quotes> */
-word b;
-char *s;
-{ extern char *miralib;
- extern char linebuf[];
- word n=strlen(s);
- /* printf("addextn(%s)\n",s); /* DEBUG */
- if(s[0]=='<'&&s[n-1]=='>')
- { static miralen=0; /* code to handle quotes added 21/1/87 */
- if(!miralen)miralen=strlen(miralib);
- strcpy(linebuf,miralib);
- linebuf[miralen]= '/';
- strcpy(linebuf+miralen+1,s+1);
- strcpy(dicp,linebuf);
- s=dicp;
- n=n+miralen-1;
- dicq=dicp+n+1;
- dicq[-1] = '\0'; /* overwrites '>' */
- ovflocheck; } else
- if(s[0]=='\"'&&s[n-1]=='\"')
- { /*strip quotes */
- dicq=dicp; s++;
- while(*s)*dicq++ = *s++;
- dicq[-1]='\0'; /* overwrites '"' */
- s=dicp; n=n-2;
- }
- if(!b||strcmp(s+n-2,".m")==0)return(s);
- if(s==dicp)dicq--;/*if s in scratch area at top of dic, extend in situ*/
- else { /* otherwise build new copy at top of dic */
- dicq=dicp;
- while(*s)*dicq++ = *s++;
- *dicq = '\0'; }
- if(strcmp(dicq-2,".x")==0)dicq -= 2; else
- if(dicq[-1]=='.')dicq -= 1;
- (void)strcpy(dicq,".m");
- dicq += 3;
- ovflocheck;
- /* printf("return(%s)\n",dicp); /* DEBUG */
- return(dicp);
-} /* NB - call keep(dicp) if the result is to be retained */
-
-word brct=0;
-
-spaces(n)
-word n;
-{ while(n-- >0)putchar(' ');
-}
-
-litname(s)
-char *s;
-{ word n=strlen(s);
- return(n>=6 && strcmp(s+n-6,".lit.m")==0);
-}
-
-word getch() /* keeps track of current position in the variable "col"(column) */
-{ word ch= getc(s_in);
- if(ch==EOF&&!atnl&&tl[fileq]==NIL) /* badly terminated top level file */
- { atnl=1; return('\n'); }
- if(atnl)
- { if((line_no==0&&!commandmode||magic&&line_no==1)&&litstack==NIL)
- litmain=literate= (ch=='>')||litname(get_fil(current_file));
- if(literate)
- { word i=0;
- while(ch!=EOF&&ch!='>')
- { ungetc(ch,s_in);
- line_no++;
- (void)fgets(dicp,250,s_in);
- if(i==0&&line_no>1)chblank(dicp); i++;
- if(echoing)spaces(lverge),fputs(dicp,stdout);
- ch=getc(s_in); }
- if((i>1||line_no==1&&i==1)&&ch!=EOF)chblank(dicp);
- if(ch=='>')
- { if(echoing)putchar(ch),spaces(lverge);ch=getc(s_in); }
- } /* supports alternative `literate' comment convention */
- atnl=0; col= lverge+literate;
- if(!commandmode&&ch!=EOF)line_no++; }
- if(echoing&&ch!=EOF)
- { putchar(ch);
- if(ch=='\n'&&!literate)
- if(litmain)putchar('>'),spaces(lverge);
- else spaces(lverge);
- }
- if(ch=='\t')col= ((col-lverge)/8 + 1)*8+lverge;
- else col++;
- if(ch=='\n')atnl= 1;
- return(ch); }
-
-word blankerr=0;
-
-chblank(s)
-char *s;
-{ while(*s==' '||*s=='\t')s++;
- if(*s=='\n')return;
- syntax("formal text not delimited by blank line\n");
- blankerr=1;
- reset(); /* easiest way to recover is to pretend it was an interrupt */
-}
-
-/* getlitch gets a character from input like getch, but using C escaping
- conventions if the char is backslash -- for use in reading character
- and string constants */
-
-word rawch;
-/* it is often important to know, when certain characters are returned (e.g.
- quotes and newlines) whether they were escaped or literal */
-
-word errch; /* for reporting unrecognised \escape */
-
-word getlitch()
-{ extern word UTF8;
- word ch=c;
- rawch = ch;
- if(ch=='\n')return(ch); /* always an error */
- if(UTF8&&ch>127)
- { /* UTF-8 uses 2 or 3 bytes for unicode points to 0xffff */
- word ch1=c=getch();
- if((ch&0xe0)==0xc0) /* 2 bytes */
- { if((ch1&0xc0)!=0x80)
- return -5; /* not valid UTF8 */
- c=getch();
- return sto_char((ch&0x1f)<<6|ch1&0x3f); }
- word ch2=c=getch();
- if((ch&0xf0)==0xe0) /* 3 bytes */
- { if((ch1&0xc0)!=0x80||(ch2&0xc0)!=0x80)
- return -5; /* not valid UTF8 */
- c=getch();
- return sto_char((ch&0xf)<<12|(ch1&0x3f)<<6|ch2&0x3f); }
- word ch3=c=getch();
- if((ch&0xf8)==0xf0) /* 4 bytes, beyond basic multiligual plane */
- { if((ch1&0xc0)!=0x80||(ch2&0xc0)!=0x80||(ch3&0xc0)!=0x80)
- return -5; /* not valid UTF8 */
- c=getch();
- return((ch&7)<<18|(ch1&0x3f)<<12|(ch2&0x3f)<<6|ch3&0x3f); }
- return(-5);
- /* not UTF8 */
- }
- if(ch!='\\')
- { c=getch(); return(ch); }
- ch = getch();
- c = getch();
- switch(ch)
- { case '\n': return(getlitch()); /* escaped nl was handled in 'getch()' */
- case 'a': return('\a');
- case 'b': return('\b');
- case 'f': return('\f'); /* form feed */
- case 'n': return('\n'); /* newline, == linefeed */
- case 'r': return('\r'); /* carriage return */
- case 't': return('\t');
- case 'v': return('\v');
- case 'X': /* omit for Haskell escape rules, see also lines marked H */
- case 'x': if(isxdigit(c))
- { word value, N=ch=='x'?4:6; /* N=7 for Haskell escape rules */
- char hold[8];
- ch = c;
- word count=0;
- /* while(ch=='0'&&isxdigit(peekch()))ch=getch(); /* H-lose leading 0s */
- while(isxdigit(ch)&&count<N)
- hold[count++]=ch,ch=getch();
- /* read upto N hex digits */
- hold[count] = '\0';
- sscanf(hold,"%x",&value);
- c = ch;
- return value>UMAX?-3 /* \x out of range */
- :sto_char(value); }
- else return -2; /* \x with no hex digits */
- default: if('0'<=ch&&ch<='9')
- { word n=ch-'0',count=1,N=3; /* N=8 for Haskell escape rules */
- ch = c;
- /* while(ch=='0'&&isdigit(peekch()))ch=getch(); /* H-lose leading 0s */
- while(isdigit(ch)&&count<N)
- /* read upto N digits */
- { n = 10*n+ch-'0';
- count++;
- ch = getch(); }
- c = ch;
- return /* n>UMAX?-4: /* H \decimal out of range */
- sto_char(n); }
- if(ch=='\''||ch=='"'||ch=='\\'||ch=='`')return(ch); /* see note */
- if(ch=='&')return -7; /* Haskell null escape, accept silently */
- errch=ch<=255?ch:'?';
- return -6; /* unrecognised \something */
- }
-} /* note: we accept \` for ` because getlitch() is used by charclass() */
-
-char *rdline() /* used by the "!" command -- see RULES */
-{ extern char *current_script;
- static char linebuf[BUFSIZE];
- char *p=linebuf;
- word ch=getchar(),expansion=0;
- while(ch==' '||ch=='\t')ch=getchar();
- if(ch=='\n'||ch=='!'&&!(*linebuf))
- { /* "!!" or "!" on its own means repeat last !command */
- if(*linebuf)printf("!%s",linebuf);
- while(ch!='\n'&&ch!=EOF)ch=getchar();
- return(linebuf); }
- if(ch=='!')
- expansion=1,p=linebuf+strlen(linebuf)-1; /* p now points at old '\n' */
- else ungetc(ch,stdin);
- while((*p++ =ch=getchar())!='\n'&&ch!=EOF)
- if(p-linebuf>=BUFSIZE)
- { *p='\0';
- fprintf(stderr,"sorry, !command too long (limit=%d chars): %s...\n",
- BUFSIZE,linebuf);
- while((ch=getchar())!='\n'&&ch!=EOF);
- return(NULL);
- } else
- if(p[-1]=='%')
- if(p>linebuf+1&&p[-2]=='\\')(--p)[-1]='%'; else
- { (void)strncpy(p-1,current_script,linebuf+BUFSIZE-p);
- p = linebuf+strlen(linebuf);
- expansion = 1;
- }
- *p = '\0';
- if(expansion)printf("!%s",linebuf);
- return(linebuf); }
-
-setlmargin() /* this and the next routine are used to enforce the offside
- rule ("yylex" refuses to read a symbol if col<lmargin) */
-{ margstack= cons(lmargin,margstack);
- if(lmargin<col)lmargin= col; } /* inner scope region cannot "protrude" */
-
-unsetlmargin()
-{ if(margstack==NIL)return; /* in case called after `syntax("..")' */
- lmargin= hd[margstack];
- margstack= tl[margstack]; }
-
-word okid();
-word okulid();
-word PREL=1;
-
-#define isletter(c) ('a'<=c&&c<='z'||'A'<=c&&c<='Z')
-
-errclass(word val, word string)
-/* diagnose error in charclass, string or char const */
-{ char *s = string==2?"char class":string?"string":"char const";
- if(val==-2)printf("\\x with no xdigits in %s\n",s); else
- if(val==-3)printf("\\hexadecimal escape out of range in %s\n",s); else
- if(val==-4)printf("\\decimal escape out of range in %s\n",s); else
- if(val==-5)printf("unrecognised character in %s"
- "(UTF8 error)\n",s); else
- if(val==-6)printf("unrecognised escape \\%c in %s\n",errch,s); else
- if(val==-7)printf("illegal use of \\& in char const\n"); else
- printf("unknown error in %s\n",s);
- acterror(); }
-
-yylex() /* called by YACC to get the next symbol */
-{ extern word SYNERR,exportfiles,inexplist,sreds;
- /* SYNERR flags context sensitive syntax error detected in actions */
- if(SYNERR)return(END); /* tell YACC to go home */
- layout();
- if(c=='\n') /* can only occur in command mode */
-/* if(magic){ commandmode=0; /* expression just read, now script */
-/* line_no=2;
-/* return(c); } else /* no longer relevant 26.11.2019 */
- return(END);
- if(col<lmargin)
- if(c=='='&&(margstack==NIL||col>=hd[margstack]))/* && part fixes utah.bug*/
- { c = getch();
- return(ELSEQ); /* ELSEQ means "OFFSIDE =" */
- }
- else return(OFFSIDE);
- if(c==';') /* fixes utah2.bug */
- { c=getch(); layout();
- if(c=='='&&(margstack==NIL||col>=hd[margstack]))
- { c = getch();
- return(ELSEQ); /* ELSEQ means "OFFSIDE =" */
- }
- else return(';');
- }
- if(
- /* c=='_'&&okid(peekch()) || /* _id/_ID as lowercase id */
- isletter(c)){ kollect(okid);
- if(inlex==1){ layout();
- yylval=name();
- return(c=='='?LEXDEF:
- isconstructor(yylval)?CNAME:
- NAME); }
- if(inbnf==1)
- /* add trailing space to nonterminal to avoid clash
- with ordinary names */
- dicq[-1] = ' ',
- *dicq++ = '\0';
- return(identifier(0)); }
- if('0'<=c&&c<='9'||c=='.'&&peekdig())
- { if(c=='0'&&tolower(peekch())=='x')
- hexnumeral(); else /* added 21.11.2013 */
- if(c=='0'&&tolower(peekch())=='o')
- getch(),c=getch(),octnumeral(); /* added 21.11.2013 */
- else numeral();
- return(CONST); }
- if(c=='%'&&!commandmode)return(directive());
- if(c=='\'')
- { c = getch();
- yylval= getlitch();
- if(yylval<0){ errclass(yylval,0); return CONST; }
- if(!is_char(yylval))
- printf("%simpossible event while reading char const ('\\%u\')\n",
- echoing?"\n":"",yylval),
- acterror();
- if(rawch=='\n'||c!='\'')syntax("improperly terminated char const\n");
- else c= getch();
- return(CONST); }
- if(inexplist&&(c=='\"'||c=='<'))
- { if(!pathname())syntax("badly formed pathname in %export list\n");
- else exportfiles=strcons(addextn(1,dicp),exportfiles),
- keep(dicp);
- return(PATHNAME); }
- if(inlex==1&&c=='`')
- { return(charclass()?ANTICHARCLASS:CHARCLASS); }
- if(c=='\"')
- { string();
- if(yylval==NIL)yylval=NILS; /* to help typechecker! */
- return(CONST); }
- if(inbnf==2) /* fiddle to offside rule in grammars */
- if(c=='[')brct++; else if(c==']')brct--; else
- if(c=='|'&&brct==0)
- return(OFFSIDE);
- if(c==EOF)
- { if(tl[fileq]==NIL&&margstack!=NIL)return(OFFSIDE); /* to fix dtbug */
- fclose((FILE *)hd[hd[fileq]]);
- fileq= tl[fileq]; insertdepth--;
- if(fileq!=NIL&&hd[echostack])
- { if(literate)putchar('>'),spaces(lverge);
- printf("<end of insert>"); }
- s_in= fileq==NIL?stdin:(FILE *)hd[hd[fileq]];
- c= ' ';
- if(fileq==NIL)
- { lverge=c=col=lmargin=0;
- /* c=0; necessary because YACC sometimes reads 1 token past END */
- atnl=1;
- echoing=verbosity&listing;
- lastline=line_no;
- /* hack so errline can be set right if err at end of file */
- line_no=0;
- litmain=literate=0;
- return(END); }
- else { current_file = tl[hd[fileq]];
- prefix=hd[prefixstack];
- prefixstack=tl[prefixstack];
- echoing=hd[echostack];
- echostack=tl[echostack];
- lverge=hd[vergstack];
- vergstack=tl[vergstack];
- literate=hd[litstack];
- litstack=tl[litstack];
- line_no=hd[linostack];
- linostack=tl[linostack]; }
- return(yylex()); }
- lastc= c;
- c= getch();
-#define try(x,y) if(c==x){ c=getch(); return(y); }
- switch(lastc) {
- case '_': if(c=='') /* underlined something */
- { c=getch();
- if(c=='<'){ c=getch(); return(LE); }
- if(c=='>'){ c=getch(); return(GE); }
- if(c=='%'&&!commandmode)return(directive());
- if(isletter(c)) /* underlined reserved word */
- { kollect(okulid);
- if(dicp[1]=='_'&&dicp[2]=='')
- return(identifier(1)); }
- syntax("illegal use of underlining\n");
- return('_'); }
- return(lastc);
- case '-': try('>',ARROW) try('-',MINUSMINUS) return(lastc);
- case '<': try('-',LEFTARROW) try('=',LE) return(lastc);
- case '=': if(c=='>'){ syntax("unexpected symbol =>\n"); return '='; }
- try('=',EQEQ) return(lastc);
- case '+': try('+',PLUSPLUS) return(lastc);
- case '.': if(c=='.')
- { c=getch();
- return(DOTDOT);
- }
- return(lastc);
- case '\\': try('/',VEL) return(lastc);
- case '>': try('=',GE) return(lastc);
- case '~': try('=',NE) return(lastc);
- case '&': if(c=='>')
- { c=getch();
- if(c=='>')yylval=1;
- else yylval=0,ungetc(c,s_in);
- c=' ';
- return(TO); }
- return(lastc);
- case '/': try('/',DIAG) return(lastc);
- case '*': try('*',collectstars()) return(lastc);
- case ':': if(c==':')
- { c=getch();
- if(c=='='){ c=getch(); return(COLON2EQ); }
- else return(COLONCOLON);
- }
- return(lastc);
- case '$': if(
- /* c=='_'&&okid(peekch())|| /* _id/_ID as id */
- isletter(c))
- { word t;
- kollect(okid);
- t=identifier(0);
- return(t==NAME?INFIXNAME:t==CNAME?INFIXCNAME:'$'); }
- /* the last alternative is an error - caveat */
- if('1'<=c&&c<='9')
- { word n=0;
- while(isdigit(c)&&n<1e6)n=10*n+c-'0',c=getch();
- if(n>sreds)
- /* sreds==0 everywhere except in semantic redn clause */
- printf("%ssyntax error: illegal symbol $%d%s\n",
- echoing?"\n":"",n,n>=1e6?"...":""),
- acterror();
- else { yylval=mkgvar(n); return(NAME); }
- }
- if(c=='-')
- { if(!compiling)
- syntax("unexpected symbol $-\n"); else
- {c=getch(); yylval=common_stdin; return(CONST); }}
- /* NB we disallow recursive use of $($/+/-) inside $+ data
- whence addition of `compiling' to premises */
- if(c==':')
- { c=getch();
- if(c!='-')syntax("unexpected symbol $:\n"); else
- { if(!compiling)
- syntax("unexpected symbol $:-\n"); else
- {c=getch(); yylval=common_stdinb; return(CONST); }}} /* $:- */
- if(c=='+')
- { /* if(!(commandmode&&compiling||magic))
- syntax("unexpected symbol $+\n"); else /* disallow in scripts */
- if(!compiling)
- syntax("unexpected symbol $+\n"); else
- { c=getch();
- if(commandmode)
- yylval=cook_stdin;
- else yylval=ap(readvals(0,0),OFFSIDE);
- return(CONST); }}
- if(c=='$')
- { if(!(inlex==2||commandmode&&compiling))
- syntax("unexpected symbol $$\n"); else
- { c=getch();
- if(inlex) { yylval=mklexvar(0); return(NAME); }
- else return(DOLLAR2); }}
- if(c=='#')
- { if(inlex!=2)syntax("unexpected symbol $#\n"); else
- { c=getch(); yylval=mklexvar(1); return(NAME); }}
- if(c=='*')
- { c=getch(); yylval=ap(GETARGS,0); return(CONST); }
- if(c=='0')
- syntax("illegal symbol $0\n");
- default: return(lastc);
-}}
-
-layout()
-{L:while(c==' '||c=='\n'&&!commandmode||c=='\t') c= getch();
- if(c==EOF&&commandmode){ c='\n'; return; }
- if(c=='|'&&peekch()=='|' /* ||comments */
- || col==1&&line_no==1 /* added 19.11.2013 */
- &&c=='#'&&peekch()=='!') /* UNIX magic string */
- { while((c=getch())!='\n'&&c!=EOF);
- if(c==EOF&&!commandmode)return;
- c= '\n';
- goto L; }
-}
-
-collectstars()
-{ word n=2;
- while(c=='*')c=getch(),n++;
- yylval= mktvar(n);
- return(TYPEVAR);
-}
-
-word gvars=NIL; /* list of grammar variables - no need to reset */
-
-mkgvar(i) /* make bound variable (corresponding to $i in bnf rule) */
-word i;
-{ word *p= &gvars;
- while(--i)
- { if(*p==NIL)*p=cons(sto_id("gvar"),NIL);
- p= &tl[*p]; }
- if(*p==NIL)*p=cons(sto_id("gvar"),NIL);
- return(hd[*p]);
-} /* all these variables have the same name, and are not in hashbucket */
-
-word lexvar=0;
-
-mklexvar(i) /* similar - corresponds to $$, $# on rhs of %lex rule */
-word i; /* i=0 or 1 */
-{ extern word ltchar;
- if(!lexvar)
- lexvar=cons(sto_id("lexvar"),sto_id("lexvar")),
- id_type(hd[lexvar])=ltchar,
- id_type(tl[lexvar])=genlstat_t();
- return(i?tl[lexvar]:hd[lexvar]);
-}
-
-word ARGC;
-char **ARGV; /* initialised in main(), see steer.c */
-
-conv_args() /* used to give access to command line args
- see case GETARGS in reduce.c */
-{ word i=ARGC,x=NIL;
- if(i==0)return(NIL); /* possible only if not invoked from a magic script */
- { while(--i)x=cons(str_conv(ARGV[i]),x);
- x=cons(str_conv(ARGV[0]),x); }
- return(x);
-}
-
-str_conv(s) /* convert C string to Miranda form */
-char *s;
-{ word x=NIL,i=strlen(s);
- while(i--)x=cons(s[i],x);
- return(x);
-} /* opposite of getstring() - see reduce.c */
-
-okpath(ch)
-word ch;
-{ return(ch!='\"'&&ch!='\n'&&ch!='>'); }
-
-char *pathname() /* returns NULL if not valid pathname (in string quotes) */
-{ layout();
- if(c=='<') /* alternative quotes <..> for system libraries */
- { extern char *miralib;
- char *hold=dicp;
- c=getch();
- (void)strcpy(dicp,miralib);
- dicp+=strlen(miralib);
- *dicp++ = '/';
- kollect(okpath);
- dicp=hold;
- if(c!='>')return(NULL);
- c=' ';
- return(dicp); }
- if(c!='\"')return(NULL);
- c=getch();
- if(c=='~')
- { char *h,*hold=dicp;
- extern char linebuf[];
- *dicp++ = c;
- c=getch();
- while(isalnum(c)||c=='-'||c=='_'||c=='.')
- *dicp++ = c, c=getch();
- *dicp='\0';
- if(h=gethome(hold+1))
- (void)strcpy(hold,h),dicp=hold+strlen(hold);
- else (void)strcpy(&linebuf[0],hold),
- (void)strcpy(hold,prefixbase+prefix),
- dicp=hold+strlen(prefixbase+prefix),
- (void)strcpy(dicp,&linebuf[0]),
- dicp+=strlen(dicp);
- kollect(okpath);
- dicp=hold;
- } else
- if(c=='/') /* absolute pathname */
- kollect(okpath);
- else { /* relative pathname */
- char *hold=dicp;
- (void)strcpy(dicp,prefixbase+prefix);
- dicp+=strlen(prefixbase+prefix);
- kollect(okpath);
- dicp=hold; }
- if(c!='\"')return(NULL);
- c = ' ';
- return(dicp);
-} /* result is volatile - call keep(dicp) to retain */
-
-adjust_prefix(f) /* called at %insert and at loadfile, to get static pathname
- resolution */
-char *f;
-{ /* the directory part of the pathname f becomes the new
- prefix for pathnames, and we stack the current prefix */
- char *g;
- prefixstack=strcons(prefix,prefixstack);
- prefix += strlen(prefixbase+prefix)+1;
- while(prefix+strlen(f)>=prefixlimit) /* check and fix overflow */
- prefixlimit += 1024, prefixbase=realloc(prefixbase,prefixlimit);
- (void)strcpy(prefixbase+prefix,f);
- g=rindex(prefixbase+prefix,'/');
- if(g)g[1]='\0';
- else prefixbase[prefix]='\0';
-}
-
-/* NOTES on how static pathname resolution is achieved:
-(the specification is that pathnames must always be resolved relative to the
-file in which they are encountered)
-Definition -- the 'prefix' of a pathname is the initial segment up to but not
-including the last occurrence of '/' (null if no '/' present).
-Keep the wd constant during compilation. Have a global char* prefix, initially
-null.
-1) Whenever you read a relative pathname(), insert 'prefix' on the front of it.
-2) On entering a new level of insert, stack old prefix and prefix becomes that
- of new file name. Done by calling adjust_prefix().
-3) On quitting a level of insert, unstack old prefix.
-*/
-
-peekdig()
-{ word ch = getc(s_in);
- ungetc(ch,s_in);
- return('0'<=ch&&ch<='9');
-}
-
-peekch()
-{ word ch = getc(s_in);
- ungetc(ch,s_in);
- return(ch);
-}
-
-openfile(n) /* returns 0 or 1 as indication of success - puts file on fileq
- if successful */
-char *n;
-{ FILE *f;
- f= fopen(n,"r");
- if(f==NULL)return(0);
- fileq= cons(strcons(f,NIL),fileq);
- insertdepth++;
- return(1);
-}
-
-identifier(s) /* recognises reserved words */
-word s; /* flags looking for ul reserved words only */
-{ extern word lastid,initialising;
- if(inbnf==1)
- { /* only reserved nonterminals are `empty', `end', `error', `where' */
- if(is("empty ")||is("e_m_p_t_y"))return(EMPTYSY); else
- if(is("end ")||is("e_n_d"))return(ENDSY); else
- if(is("error ")||is("e_r_r_o_r"))return(ERRORSY); else
- if(is("where ")||is("w_h_e_r_e"))return(WHERE); }
- else
- switch(dicp[0])
- { case 'a': if(is("abstype")||is("a_b_s_t_y_p_e"))
- return(ABSTYPE);
- break;
- case 'd': if(is("div")||is("d_i_v"))
- return(DIV);
- break;
- case 'F': if(is("False")) /* True, False alleged to be predefined, not
- reserved (??) */
- { yylval = False;
- return(CONST); }
- break;
- case 'i': if(is("if")||is("i_f"))
- return(IF);
- break;
- case 'm': if(is("mod")||is("m_o_d"))
- return(REM);
- break;
- case 'o': if(is("otherwise")||is("o_t_h_e_r_w_i_s_e"))
- return(OTHERWISE);
- break;
- case 'r': if(is("readvals")||is("r_e_a_d_v_a_l_s"))
- return(READVALSY);
- break;
- case 's': if(is("show")||is("s_h_o_w"))
- return(SHOWSYM);
- break;
- case 'T': if(is("True"))
- { yylval = True;
- return(CONST); }
- case 't': if(is("type")||is("t_y_p_e"))
- return(TYPE);
- break;
- case 'w': if(is("where")||is("w_h_e_r_e"))
- return(WHERE);
- if(is("with")||is("w_i_t_h"))
- return(WITH);
- break;
- }
- if(s){ syntax("illegal use of underlining\n"); return('_'); }
- yylval=name(); /* not a reserved word */
- if(commandmode&&lastid==0&&id_type(yylval)!=undef_t)lastid=yylval;
- return(isconstructor(yylval)?CNAME:NAME);
-}
-
-word disgusting=0; /* flag to turn off typecheck, temporary hack for jrc */
-
-directive() /* these are of the form "%identifier" */
-{ extern word SYNERR,magic;
- word holdcol=col-1,holdlin=line_no;
- c = getch();
- if(c=='%'){ c=getch(); return(ENDIR); }
- kollect(okulid);
- switch(dicp[0]=='_'&&dicp[1]==''?dicp[2]:dicp[0])
- { case 'b': if(is("begin")||is("_^Hb_^He_^Hg_^Hi_^Hn"))
- if(inlex)
- return(LBEGIN);
- if(is("bnf")||is("_^Hb_^Hn_^Hf"))
- { setlmargin(); col=holdcol+4;
- /* `indent' to right hand end of directive */
- return(BNF); }
- break;
- case 'e': if(is("export")||is("_e_x_p_o_r_t"))
- { if(magic)syntax(
- "%export directive not permitted in \"-exp\" script\n");
- return(EXPORT); }
- break;
- case 'f': if(is("free")||is("_f_r_e_e"))
- { if(magic)syntax(
- "%free directive not permitted in \"-exp\" script\n");
- return(FREE); }
- break;
- case 'i': if(is("include")||is("_i_n_c_l_u_d_e"))
- { if(!SYNERR){ layout(); setlmargin(); }
- /* does `indent' for grammar */
- if(!pathname())
- syntax("bad pathname after %include\n");
- else yylval=strcons(addextn(1,dicp),
- fileinfo(get_fil(current_file),holdlin)),
- /* (includee,hereinfo) */
- keep(dicp);
- return(INCLUDE); }
- if(is("insert")||is("_i_n_s_e_r_t"))
- { char *f=pathname();
- if(!f)syntax("bad pathname after %insert\n"); else
- if(insertdepth<12&&openfile(f))
- { adjust_prefix(f);
- vergstack=cons(lverge,vergstack);
- echostack=cons(echoing,echostack);
- litstack=cons(literate,litstack);
- linostack=strcons(line_no,linostack);
- line_no=0; atnl=1; /* was line_no=1; */
- keep(dicp);
- current_file = make_fil(f,fm_time(f),0,NIL);
- files = append1(files,cons(current_file,NIL));
- tl[hd[fileq]] = current_file;
- s_in = (FILE *)hd[hd[fileq]];
- literate= peekch()=='>'||litname(f);
- col=lverge=holdcol;
- if(echoing)
- { putchar('\n');
- if(!literate)
- if(litmain)putchar('>'),spaces(holdcol);
- else spaces(holdcol); }
- c = getch(); } /* used to precede previous cmd when echo
- was delayed by one char, see getch() */
- else { word toomany=(insertdepth>=12);
- printf("%s%%insert error - cannot open \"%s\"\n",
- echoing?"\n":"",f);
- keep(dicp);
- if(toomany)printf(
- "too many nested %%insert directives (limit=%d)\n",
- insertdepth);
- else
- files = append1(files,cons(make_fil(f,0,0,NIL),NIL));
- /* line above for benefit of `oldfiles' */
- acterror(); }
- return(yylex()); }
- break;
- case 'l': if(is("lex")||is("_^Hl_^He_^Hx"))
- { if(inlex)syntax("nested %lex not permitted\n");
- /* due to use of global vars inlex, lexdefs */
- return(LEX); }
- if(is("list")||is("_l_i_s_t"))
- { echoing=verbosity; return(yylex()); }
- break;
- case 'n': if(is("nolist")||is("_n_o_l_i_s_t"))
- { echoing=0; return(yylex()); }
- break;
- }
- if(echoing)putchar('\n');
- printf("syntax error: unknown directive \"%%%s\"\n",dicp),
- acterror();
- return(END);
-}
-
-okid(ch)
-word ch;
-{ return('a'<=ch&&ch<='z'||'A'<=ch&&ch<='Z'||'0'<=ch&&ch<='9'
- ||ch=='_'||ch=='\''); }
-
-okulid(ch)
-word ch;
-{ return('a'<=ch&&ch<='z'||'A'<=ch&&ch<='Z'||'0'<=ch&&ch<='9'
- ||ch=='_'||ch==''||ch=='\''); }
-
-kollect(f)
-/* note top of dictionary used as work space to collect current token */
-word (*f)();
-{ dicq= dicp;
- while((*f)(c)){ *dicq++ = c; c= getch(); }
- *dicq++ = '\0';
- ovflocheck;
-}
-
-char *keep(p) /* call this to retain volatile string for later use */
-char *p;
-{ if(p==dicp)dicp= dicq;
- else (void)strcpy(dicp,p),
- p=dicp,
- dicp=dicq=dicp+strlen(dicp)+1,
- dic_check();
- return(p);
-}
-
-dic_check() /* called from REDUCE */
-{ ovflocheck; }
-
-numeral()
-{ word nflag=1;
- dicq= dicp;
- while(isdigit(c))
- *dicq++ = c, c=getch();
- if(c=='.'&&peekdig())
- { *dicq++ = c, c=getch(); nflag=0;
- while(isdigit(c))
- *dicq++ = c, c=getch(); }
- if(c=='e')
- { word np=0;
- *dicq++ = c, c=getch(); nflag=0;
- if(c=='+')c=getch(); else /* ignore + before exponent */
- if(c=='-')*dicq++ = c, c=getch();
- if(!isdigit(c)) /* e must be followed by some digits */
- syntax("badly formed floating point number\n");
- while(c=='0')
- *dicq++ = c, c=getch();
- while(isdigit(c))
- np++, *dicq++ = c, c=getch();
- if(!nflag&&np>3) /* scanf falls over with silly exponents */
- { syntax("floating point number out of range\n");
- return; }
- }
- ovflocheck;
- if(nflag) /* `.' or `e' makes fractional */
- *dicq = '\0',
- yylval= bigscan(dicp); else
- { double r=0.0;
- if(dicq-dicp>60) /* this allows 59 chars */
- /* scanf crashes, on VAX, gives wrong answers, on ORION 1/05 */
- { syntax("illegal floating point constant (too many digits)\n");
- return; }
- errno=0;
- *dicq = '\n';
- sscanf(dicp,"%lf",&r);
- if(errno)fpe_error(); else
- yylval= sto_dbl((double)r); }
-}
-
-hexnumeral() /* added 21.11.2013 */
-{ extern word errno;
- word nflag=1;
- dicq= dicp;
- *dicq++ = c, c=getch(); /* 0 */
- *dicq++ = c, c=getch(); /* x */
- if(!isxdigit(c)&&c!='.')syntax("malformed hex number\n");
- while(c=='0'&&isxdigit(peekch()))c=getch(); /* skip zeros before first nonzero digit */
- while(isxdigit(c))
- *dicq++ = c, c=getch();
- ovflocheck;
- if(c=='.'||tolower(c)=='p') /* hex float, added 20.11.19 */
- { double d;
- if(c=='.')
- { *dicq++ = c, c=getch();
- while(isxdigit(c))
- *dicq++ = c, c=getch(); }
- if(c=='p')
- { *dicq++ = c, c=getch();
- if(c=='+'||c=='-')*dicq++ = c, c=getch();
- if(!isdigit(c))syntax("malformed hex float\n");
- while(isdigit(c))
- *dicq++ = c, c=getch(); }
- ovflocheck;
- *dicq='\0';
- if(dicq-dicp>60||sscanf(dicp,"%lf",&d)!=1)
- syntax("malformed hex float\n");
- else yylval= sto_dbl(d);
- return; }
- *dicq = '\0';
- yylval= bigxscan(dicp+2,dicq);
-}
-
-octnumeral() /* added 21.11.2013 */
-{ extern word errno;
- word nflag=1;
- dicq= dicp;
- if(!isdigit(c))syntax("malformed octal number\n");
- while(c=='0'&&isdigit(peekch()))c=getch(); /* skip zeros before first nonzero digit */
- while(isdigit(c)&&c<='7')
- *dicq++ = c, c=getch();
- if(isdigit(c))syntax("illegal digit in octal number\n");
- ovflocheck;
- *dicq = '\0';
- yylval= bigoscan(dicp,dicq);
-}
-
-word namebucket[128]; /* each namebucket has a list terminated by 0, not NIL */
-
-hash(s) /* returns a value in {0..127} */
-char *s;
-{ word h = *s;
- if(h)while(*++s)h ^= *s; /* guard necessary to deal with s empty */
- return(h&127);
-}
-
-isconstrname(s)
-char *s;
-{ if(s[0]=='$')s++;
- return isupper(*s); /* formerly !islower */
-}
-
-getfname(x)
-/* nonterminals have an added ' ', getfname returns the corresponding
- function name */
-word x;
-{ char *p = get_id(x);
- dicq= dicp;
- while(*dicq++ = *p++);
- if(dicq-dicp<3)fprintf(stderr,"impossible event in getfname\n"),exit(1);
- dicq[-2] = '\0'; /* overwrite last char */
- ovflocheck;
- return(name());
-}
-
-isnonterminal(x)
-word x;
-{ char *n;
- if(tag[x]!=ID)return(0);
- n = get_id(x);
- return(n[strlen(n)-1]==' ');
-}
-
-name()
-{ word q,h;
- q= namebucket[h=hash(dicp)];
- while(q&&!is(get_id(hd[q])))q= tl[q];
- if(q==0)
- { q = sto_id(dicp);
- namebucket[h] = cons(q,namebucket[h]);
- keep(dicp); }
- else q= hd[q];
- return(q); }
-/* note - keeping buckets sorted didn't seem to help (if anything slightly
- slower) probably because ordering only relevant if name not present, and
- outweighed by increased complexity of loop */
-
-static word inprelude=1;
-
-make_id(n) /* used in mira_setup(), primdef(), predef(), all in steer.c */
-char *n;
-{ word x,h;
- h=hash(n);
- x = sto_id(inprelude?keep(n):n);
- namebucket[h] = cons(x,namebucket[h]);
- return(x); }
-
-findid(n) /* like name() but returns NIL rather than create new id */
-char *n;
-{ word q;
- q= namebucket[hash(n)];
- while(q&&!strcmp(n,get_id(hd[q]))==0)q= tl[q];
- return(q?hd[q]:NIL); }
-
-word *pnvec=0,nextpn,pn_lim=200; /* private name vector */
-
-reset_pns() /* (re)initialise private name space */
-{ nextpn=0;
- if(!pnvec)
- { pnvec=(word *)malloc(pn_lim*sizeof(word));
- if(pnvec==NULL)mallocfail("pnvec"); }
-}
-
-make_pn(val) /* create new private name with value val */
-word val;
-{ if(nextpn==pn_lim)
- { pn_lim+=400;
- pnvec=(word *)realloc(pnvec,pn_lim*sizeof(word));
- if(pnvec==NULL)mallocfail("pnvec"); }
- pnvec[nextpn]=strcons(nextpn,val);
- return(pnvec[nextpn++]);
-}
-
-sto_pn(n) /* return n'th private name, extending pnvec if necessary */
-word n;
-{ if(n>=pn_lim)
- { while(pn_lim<=n)pn_lim+=400;
- pnvec=(word *)realloc(pnvec,pn_lim*sizeof(word));
- if(pnvec==NULL)mallocfail("pnvec"); }
- while(nextpn<=n) /* NB allocates all missing names upto and including nth*/
- pnvec[nextpn]=strcons(nextpn,UNDEF),nextpn++;
- return(pnvec[n]);
-}
-
-mkprivate(x) /* disguise identifiers prior to removal from environment */
-word x; /* used in setting up prelude - see main() in steer.c */
-{ while(x!=NIL)
- { char *s = get_id(hd[x]);
- get_id(hd[x])[0] += 128; /* hack to make private internal name */
- x = tl[x]; } /* NB - doesn't change hashbucket */
- inprelude=0;
-}
-
-word sl=100;
-
-string()
-{ word p;
- word ch,badch=0;
- c = getch();
- ch= getlitch();
- p= yylval= cons(NIL,NIL);
- while(ch!=EOF&&rawch!='\"'&&rawch!='\n')
- if(ch==-7) ch=getlitch(); else /* skip \& */
- if(ch<0){ badch=ch; break; }
- else { p= tl[p]= cons(ch,NIL);
- ch= getlitch(); }
- yylval= tl[yylval];
- if(badch)errclass(badch,1);
- if(rawch=='\n')
- syntax("non-escaped newline encountered inside string quotes\n"); else
- if(ch==EOF)
- { if(echoing)putchar('\n');
- printf("syntax error: script ends inside unclosed string quotes - \n");
- printf(" \"");
- while(yylval!=NIL&& sl-- )
- { putchar(hd[yylval]);
- yylval= tl[yylval]; }
- printf("...\"\n");
- acterror(); }
-}
-
-charclass()
-{ word p;
- word ch,badch=0,anti=0;
- c = getch();
- if(c=='^')anti=1,c=getch();
- ch= getlitch();
- p= yylval= cons(NIL,NIL);
- while(ch!=EOF&&rawch!='`'&&rawch!='\n')
- if(ch==-7)ch=getlitch(); else /* skip \& */
- if(ch<0){ badch=ch; break; }
- else { if(rawch=='-'&&hd[p]!=NIL&&hd[p]!=DOTDOT)
- ch=DOTDOT; /* non-initial, non-escaped '-' */
- p= tl[p]= cons(ch,NIL);
- ch= getlitch(); }
- if(hd[p]==DOTDOT)hd[p]='-'; /* naturalise a trailing '-' */
- for(p=yylval;tl[p]!=NIL;p=tl[p]) /* move each DOTDOT to front of range */
- if(hd[tl[p]]==DOTDOT)
- { hd[tl[p]]=hd[p],hd[p]=DOTDOT;
- if(hd[tl[p]]>=hd[tl[tl[p]]])
- syntax("illegal use of '-' in [charclass]\n");
- }
- yylval= tl[yylval];
- if(badch)errclass(badch,2);
- if(rawch=='\n')
- syntax("non-escaped newline encountered in char class\n"); else
- if(ch==EOF)
- { if(echoing)putchar('\n');
- printf(
- "syntax error: script ends inside unclosed char class brackets - \n");
- printf(" [");
- while(yylval!=NIL&& sl-- )
- { putchar(hd[yylval]);
- yylval= tl[yylval]; }
- printf("...]\n");
- acterror(); }
- return(anti);
-}
-
-reset_lex() /* called after an error */
-{ extern word errs,errline;
- extern char *current_script;
- /*printf("reset_lex()\n"); /* DEBUG */
- if(!commandmode)
- { if(!errs)errs=fileinfo(get_fil(current_file),line_no);
- /* convention, if errs set contains location of error, otherwise pick up
- from current_file and line_no */
- if(tl[errs]==0&&(char *)hd[errs]==current_script)
- /* at end of file, so line_no has been reset to 0 */
- printf("error occurs at end of ");
- else printf("error found near line %d of ",tl[errs]);
- printf("%sfile \"%s\"\ncompilation abandoned\n",
- (char *)hd[errs]==current_script?"":"%insert ",
- (char *)hd[errs]);
- if((char *)hd[errs]==current_script)
- errline=tl[errs]==0?lastline:tl[errs],errs=0;
- else { while(tl[linostack]!=NIL)linostack=tl[linostack];
- errline=hd[linostack]; }
- /* tells editor where to find error - errline contains location of 1st
- error in main script, errs is hereinfo of upto one error in %insert
- script (each is 0 if not set) - some errors can set both */
- }
- reset_state();
-}
-
-reset_state() /* reset all global variables used by compiler */
-{ extern word TABSTRS,SGC,newtyps,algshfns,showchain,inexplist,sreds,
- rv_script,idsused;
- /* printf("reset_state()\n"); /* DEBUG */
- if(commandmode)
- while(c!='\n'&&c!=EOF)c=getc(s_in); /* no echo */
- while(fileq!=NIL)fclose((FILE *)hd[hd[fileq]]),fileq=tl[fileq];
- insertdepth= -1;
- s_in=stdin;
- echostack=idsused=prefixstack=litstack=linostack=vergstack
- =margstack=NIL;
- prefix=0; prefixbase[0]='\0';
- echoing=verbosity&listing;
- brct=inbnf=sreds=inlex=inexplist=commandmode=lverge=col=lmargin=0;
- atnl=1;
- rv_script=0;
- algshfns=newtyps=showchain=SGC=TABSTRS=NIL;
- c=' ';
- line_no=0;
- litmain=literate=0;
- /* printf("exit reset_state()\n"); /* DEBUG */
-}
-
-/* end of MIRANDA LEX ANALYSER */
-