/* MIRANDA LEX ANALYSER */ /************************************************************************** * Copyright (C) Research Software Limited 1985-90. All rights reserved. * * The Miranda system is distributed as free software under the terms in * * the file "COPYING" which is included in the distribution. * * * * Revised to C11 standard and made 64bit compatible, January 2020 * *------------------------------------------------------------------------*/ #include "data.h" #include "lex.h" #include "big.h" #include <errno.h> static int charclass(void); static void chblank(char *); static int collectstars(void); static word directive(void); static void hexnumeral(void); static int identifier(int); static void kollect(int(*f)()); static void numeral(void); static void octnumeral(void); static int peekch(void); static int peekdig(void); static void string(void); extern word DICSPACE; /* see steer.c for default value */ /* capacity in chars of dictionary space for storing identifiers and file names to get a larger name space just increase this number */ extern FILE *s_in; extern word echoing,listing,verbosity,magic,inbnf,inlex; word fileq=NIL; /* list of currently open-for-input files, of form cons(strcons(stream,<ptr to element of 'files'>),...)*/ word insertdepth= -1,margstack=NIL,col=0,lmargin=0; word echostack=NIL; word lverge=0,vergstack=NIL; char *prefixbase; /* stores prefixes for pathnames, to get static resolution */ word prefixlimit=1024; /* initial size of space for prefixes */ word prefix,prefixstack=NIL; /* current prefix, stack of old prefixes */ word atnl=1,line_no=0; word lastline; word litstack=NIL,linostack=NIL; word c=' ', lastc; extern word commandmode; word common_stdin,common_stdinb,cook_stdin; word litmain=0,literate=0; /* flags "literate" comment convention */ char *dic,*dicp,*dicq; char *pathname(); void setupdic() { dicp=dicq=dic=malloc(DICSPACE); if(dic==NULL)mallocfail("dictionary"); /* it is not permissible to realloc dic, because at the moment identifiers etc. contain absolute pointers into the dictionary space - so we must choose fairly large initial value for DICSPACE. Fix this later */ prefixbase=malloc(prefixlimit); prefixbase[0]='\0'; prefix=0; } /* this allows ~login convention in filenames */ /* #define okgetpwnam /* suppress 26.5.06 getpwnam causes runtime error when statically linked (Linux) */ #ifdef okgetpwnam #include <pwd.h> struct passwd *getpwnam(); #endif char *getenv(); char *gethome(n) /* for expanding leading `~' in tokens and pathnames */ char *n; { struct passwd *pw; if(n[0]=='\0')return(getenv("HOME")); #ifdef okgetpwnam if(pw=getpwnam(n))return(pw->pw_dir); #endif return(NULL); } #define ovflocheck if(dicq-dic>DICSPACE)dicovflo() void dicovflo() /* is this called everywhere it should be? Check later */ { fprintf(stderr,"\npanic: dictionary overflow\n"); exit(1); } char *token() /* lex analyser for command language (very simple) */ { extern char *current_script; word ch=getchar(); dicq = dicp; /* uses top of dictionary as temporary work space */ while(ch==' '||ch=='\t')ch=getchar(); if(ch=='~') { char *h; *dicq++ = ch; ch=getchar(); while(isalnum(ch)||ch=='-'||ch=='_'||ch=='.') *dicq++ = ch,ch=getchar(); /* NB csh does not allow `.' in user ids when expanding `~' but this may be a mistake */ *dicq='\0'; if(h=gethome(dicp+1)) (void)strcpy(dicp,h),dicq=dicp+strlen(dicp); } #ifdef SPACEINFILENAMES if(ch!='"'&&ch!='<') /* test added 9.5.06 see else part */ #endif while(!isspace(ch)&&ch!=EOF) { *dicq++ = ch; if(ch=='%') if(dicq[-2]=='\\')(--dicq)[-1]='%'; else dicq--,(void)strcpy(dicq,current_script),dicq+=strlen(dicq); ch=getchar(); } #ifdef SPACEINFILENAMES else { word closeq= ch=='<'?'>':'"'; /* this branch added 9.5.06 */ *dicq++ = ch; /* to allow spaces in "tok" or <tok> */ ch=getchar(); while(ch!=closeq&&ch!='\n'&&ch!=EOF) *dicq++ = ch, ch=getchar(); if(ch==closeq)*dicq++ = ch, ch=getchar(); } #endif *dicq++ = '\0'; ovflocheck; while(ch==' '||ch=='\t')ch=getchar(); ungetc(ch,stdin); return(*dicp=='\0'?(char *)NULL:dicp); } /* NB - if no token returns NULL rather than pointer to empty string */ char *addextn(b,s) /* if(b)force s to end in ".m", and resolve <quotes> */ word b; char *s; { extern char *miralib; extern char linebuf[]; word n=strlen(s); /* printf("addextn(%s)\n",s); /* DEBUG */ if(s[0]=='<'&&s[n-1]=='>') { static int miralen=0; /* code to handle quotes added 21/1/87 */ if(!miralen)miralen=strlen(miralib); strcpy(linebuf,miralib); linebuf[miralen]= '/'; strcpy(linebuf+miralen+1,s+1); strcpy(dicp,linebuf); s=dicp; n=n+miralen-1; dicq=dicp+n+1; dicq[-1] = '\0'; /* overwrites '>' */ ovflocheck; } else if(s[0]=='\"'&&s[n-1]=='\"') { /*strip quotes */ dicq=dicp; s++; while(*s)*dicq++ = *s++; dicq[-1]='\0'; /* overwrites '"' */ s=dicp; n=n-2; } if(!b||strcmp(s+n-2,".m")==0)return(s); if(s==dicp)dicq--;/*if s in scratch area at top of dic, extend in situ*/ else { /* otherwise build new copy at top of dic */ dicq=dicp; while(*s)*dicq++ = *s++; *dicq = '\0'; } if(strcmp(dicq-2,".x")==0)dicq -= 2; else if(dicq[-1]=='.')dicq -= 1; (void)strcpy(dicq,".m"); dicq += 3; ovflocheck; /* printf("return(%s)\n",dicp); /* DEBUG */ return(dicp); } /* NB - call keep(dicp) if the result is to be retained */ word brct=0; void spaces(n) word n; { while(n-- >0)putchar(' '); } int litname(s) char *s; { word n=strlen(s); return(n>=6 && strcmp(s+n-6,".lit.m")==0); } int getch() /* keeps track of current position in the variable "col"(column) */ { word ch= getc(s_in); if(ch==EOF&&!atnl&&tl[fileq]==NIL) /* badly terminated top level file */ { atnl=1; return('\n'); } if(atnl) { if((line_no==0&&!commandmode||magic&&line_no==1)&&litstack==NIL) litmain=literate= (ch=='>')||litname(get_fil(current_file)); if(literate) { word i=0; while(ch!=EOF&&ch!='>') { ungetc(ch,s_in); line_no++; (void)fgets(dicp,250,s_in); if(i==0&&line_no>1)chblank(dicp); i++; if(echoing)spaces(lverge),fputs(dicp,stdout); ch=getc(s_in); } if((i>1||line_no==1&&i==1)&&ch!=EOF)chblank(dicp); if(ch=='>') { if(echoing)putchar(ch),spaces(lverge);ch=getc(s_in); } } /* supports alternative `literate' comment convention */ atnl=0; col= lverge+literate; if(!commandmode&&ch!=EOF)line_no++; } if(echoing&&ch!=EOF) { putchar(ch); if(ch=='\n'&&!literate) if(litmain)putchar('>'),spaces(lverge); else spaces(lverge); } if(ch=='\t')col= ((col-lverge)/8 + 1)*8+lverge; else col++; if(ch=='\n')atnl= 1; return(ch); } int blankerr=0; void chblank(s) char *s; { while(*s==' '||*s=='\t')s++; if(*s=='\n')return; syntax("formal text not delimited by blank line\n"); blankerr=1; reset(); /* easiest way to recover is to pretend it was an interrupt */ } /* getlitch gets a character from input like getch, but using C escaping conventions if the char is backslash -- for use in reading character and string constants */ int rawch; /* it is often important to know, when certain characters are returned (e.g. quotes and newlines) whether they were escaped or literal */ int errch; /* for reporting unrecognised \escape */ int getlitch() { extern int UTF8; int ch=c; rawch = ch; if(ch=='\n')return(ch); /* always an error */ if(UTF8&&ch>127) { /* UTF-8 uses 2 or 3 bytes for unicode points to 0xffff */ word ch1=c=getch(); if((ch&0xe0)==0xc0) /* 2 bytes */ { if((ch1&0xc0)!=0x80) return -5; /* not valid UTF8 */ c=getch(); return sto_char((ch&0x1f)<<6|ch1&0x3f); } word ch2=c=getch(); if((ch&0xf0)==0xe0) /* 3 bytes */ { if((ch1&0xc0)!=0x80||(ch2&0xc0)!=0x80) return -5; /* not valid UTF8 */ c=getch(); return sto_char((ch&0xf)<<12|(ch1&0x3f)<<6|ch2&0x3f); } word ch3=c=getch(); if((ch&0xf8)==0xf0) /* 4 bytes, beyond basic multiligual plane */ { if((ch1&0xc0)!=0x80||(ch2&0xc0)!=0x80||(ch3&0xc0)!=0x80) return -5; /* not valid UTF8 */ c=getch(); return((ch&7)<<18|(ch1&0x3f)<<12|(ch2&0x3f)<<6|ch3&0x3f); } return(-5); /* not UTF8 */ } if(ch!='\\') { c=getch(); return(ch); } ch = getch(); c = getch(); switch(ch) { case '\n': return(getlitch()); /* escaped nl was handled in 'getch()' */ case 'a': return('\a'); case 'b': return('\b'); case 'f': return('\f'); /* form feed */ case 'n': return('\n'); /* newline, == linefeed */ case 'r': return('\r'); /* carriage return */ case 't': return('\t'); case 'v': return('\v'); case 'X': /* omit for Haskell escape rules, see also lines marked H */ case 'x': if(isxdigit(c)) { int value, N=ch=='x'?4:6; /* N=7 for Haskell escape rules */ char hold[8]; ch = c; int count=0; /* while(ch=='0'&&isxdigit(peekch()))ch=getch(); /* H-lose leading 0s */ while(isxdigit(ch)&&count<N) hold[count++]=ch,ch=getch(); /* read upto N hex digits */ hold[count] = '\0'; sscanf(hold,"%x",&value); c = ch; return value>UMAX?-3 /* \x out of range */ :sto_char(value); } else return -2; /* \x with no hex digits */ default: if('0'<=ch&&ch<='9') { word n=ch-'0',count=1,N=3; /* N=8 for Haskell escape rules */ ch = c; /* while(ch=='0'&&isdigit(peekch()))ch=getch(); /* H-lose leading 0s */ while(isdigit(ch)&&count<N) /* read upto N digits */ { n = 10*n+ch-'0'; count++; ch = getch(); } c = ch; return /* n>UMAX?-4: /* H \decimal out of range */ sto_char(n); } if(ch=='\''||ch=='"'||ch=='\\'||ch=='`')return(ch); /* see note */ if(ch=='&')return -7; /* Haskell null escape, accept silently */ errch=ch<=255?ch:'?'; return -6; /* unrecognised \something */ } } /* note: we accept \` for ` because getlitch() is used by charclass() */ char *rdline() /* used by the "!" command -- see steer.c */ { extern char *current_script; static char linebuf[BUFSIZE]; char *p=linebuf; word ch=getchar(),expansion=0; while(ch==' '||ch=='\t')ch=getchar(); if(ch=='\n'||ch=='!'&&!(*linebuf)) { /* "!!" or "!" on its own means repeat last !command */ if(*linebuf)printf("!%s",linebuf); while(ch!='\n'&&ch!=EOF)ch=getchar(); return(linebuf); } if(ch=='!') expansion=1,p=linebuf+strlen(linebuf)-1; /* p now points at old '\n' */ else ungetc(ch,stdin); while((*p++ =ch=getchar())!='\n'&&ch!=EOF) if(p-linebuf>=BUFSIZE) { *p='\0'; fprintf(stderr,"sorry, !command too long (limit=%d chars): %s...\n", BUFSIZE,linebuf); while((ch=getchar())!='\n'&&ch!=EOF); return(NULL); } else if(p[-1]=='%') if(p>linebuf+1&&p[-2]=='\\')(--p)[-1]='%'; else { (void)strncpy(p-1,current_script,linebuf+BUFSIZE-p); p = linebuf+strlen(linebuf); expansion = 1; } *p = '\0'; if(expansion)printf("!%s",linebuf); return(linebuf); } void setlmargin() /* this and the next routine are used to enforce the offside rule ("yylex" refuses to read a symbol if col<lmargin) */ { margstack= cons(lmargin,margstack); if(lmargin<col)lmargin= col; } /* inner scope region cannot "protrude" */ void unsetlmargin() { if(margstack==NIL)return; /* in case called after `syntax("..")' */ lmargin= hd[margstack]; margstack= tl[margstack]; } int okulid(int); int PREL=1; void errclass(word val, word string) /* diagnose error in charclass, string or char const */ { char *s = string==2?"char class":string?"string":"char const"; if(val==-2)printf("\\x with no xdigits in %s\n",s); else if(val==-3)printf("\\hexadecimal escape out of range in %s\n",s); else if(val==-4)printf("\\decimal escape out of range in %s\n",s); else if(val==-5)printf("unrecognised character in %s" "(UTF8 error)\n",s); else if(val==-6)printf("unrecognised escape \\%c in %s\n",errch,s); else if(val==-7)printf("illegal use of \\& in char const\n"); else printf("unknown error in %s\n",s); acterror(); } word yylex() /* called by YACC to get the next symbol */ { extern word SYNERR,exportfiles,inexplist,sreds; /* SYNERR flags context sensitive syntax error detected in actions */ if(SYNERR)return(END); /* tell YACC to go home */ layout(); if(c=='\n') /* can only occur in command mode */ /* if(magic){ commandmode=0; /* expression just read, now script */ /* line_no=2; /* return(c); } else /* no longer relevant 26.11.2019 */ return(END); if(col<lmargin) if(c=='='&&(margstack==NIL||col>=hd[margstack]))/* && part fixes utah.bug*/ { c = getch(); return(ELSEQ); /* ELSEQ means "OFFSIDE =" */ } else return(OFFSIDE); if(c==';') /* fixes utah2.bug */ { c=getch(); layout(); if(c=='='&&(margstack==NIL||col>=hd[margstack])) { c = getch(); return(ELSEQ); /* ELSEQ means "OFFSIDE =" */ } else return(';'); } if( /* c=='_'&&okid(peekch()) || /* _id/_ID as lowercase id */ isalpha(c)){ kollect(okid); if(inlex==1){ layout(); yylval=name(); return(c=='='?LEXDEF: isconstructor(yylval)?CNAME: NAME); } if(inbnf==1) /* add trailing space to nonterminal to avoid clash with ordinary names */ dicq[-1] = ' ', *dicq++ = '\0'; return(identifier(0)); } if('0'<=c&&c<='9'||c=='.'&&peekdig()) { if(c=='0'&&tolower(peekch())=='x') hexnumeral(); else /* added 21.11.2013 */ if(c=='0'&&tolower(peekch())=='o') getch(),c=getch(),octnumeral(); /* added 21.11.2013 */ else numeral(); return(CONST); } if(c=='%'&&!commandmode)return(directive()); if(c=='\'') { c = getch(); yylval= getlitch(); if(yylval<0){ errclass(yylval,0); return CONST; } if(!is_char(yylval)) printf("%simpossible event while reading char const ('\\%lu\')\n", echoing?"\n":"",yylval), acterror(); if(rawch=='\n'||c!='\'')syntax("improperly terminated char const\n"); else c= getch(); return(CONST); } if(inexplist&&(c=='\"'||c=='<')) { if(!pathname())syntax("badly formed pathname in %export list\n"); else exportfiles=strcons(addextn(1,dicp),exportfiles), keep(dicp); return(PATHNAME); } if(inlex==1&&c=='`') { return(charclass()?ANTICHARCLASS:CHARCLASS); } if(c=='\"') { string(); if(yylval==NIL)yylval=NILS; /* to help typechecker! */ return(CONST); } if(inbnf==2) /* fiddle to offside rule in grammars */ if(c=='[')brct++; else if(c==']')brct--; else if(c=='|'&&brct==0) return(OFFSIDE); if(c==EOF) { if(tl[fileq]==NIL&&margstack!=NIL)return(OFFSIDE); /* to fix dtbug */ fclose((FILE *)hd[hd[fileq]]); fileq= tl[fileq]; insertdepth--; if(fileq!=NIL&&hd[echostack]) { if(literate)putchar('>'),spaces(lverge); printf("<end of insert>"); } s_in= fileq==NIL?stdin:(FILE *)hd[hd[fileq]]; c= ' '; if(fileq==NIL) { lverge=c=col=lmargin=0; /* c=0; necessary because YACC sometimes reads 1 token past END */ atnl=1; echoing=verbosity&listing; lastline=line_no; /* hack so errline can be set right if err at end of file */ line_no=0; litmain=literate=0; return(END); } else { current_file = tl[hd[fileq]]; prefix=hd[prefixstack]; prefixstack=tl[prefixstack]; echoing=hd[echostack]; echostack=tl[echostack]; lverge=hd[vergstack]; vergstack=tl[vergstack]; literate=hd[litstack]; litstack=tl[litstack]; line_no=hd[linostack]; linostack=tl[linostack]; } return(yylex()); } lastc= c; c= getch(); #define try(x,y) if(c==x){ c=getch(); return(y); } switch(lastc) { case '_': if(c=='') /* underlined something */ { c=getch(); if(c=='<'){ c=getch(); return(LE); } if(c=='>'){ c=getch(); return(GE); } if(c=='%'&&!commandmode)return(directive()); if(isalpha(c)) /* underlined reserved word */ { kollect(okulid); if(dicp[1]=='_'&&dicp[2]=='') return(identifier(1)); } syntax("illegal use of underlining\n"); return('_'); } return(lastc); case '-': try('>',ARROW) try('-',MINUSMINUS) return(lastc); case '<': try('-',LEFTARROW) try('=',LE) return(lastc); case '=': if(c=='>'){ syntax("unexpected symbol =>\n"); return '='; } try('=',EQEQ) return(lastc); case '+': try('+',PLUSPLUS) return(lastc); case '.': if(c=='.') { c=getch(); return(DOTDOT); } return(lastc); case '\\': try('/',VEL) return(lastc); case '>': try('=',GE) return(lastc); case '~': try('=',NE) return(lastc); case '&': if(c=='>') { c=getch(); if(c=='>')yylval=1; else yylval=0,ungetc(c,s_in); c=' '; return(TO); } return(lastc); case '/': try('/',DIAG) return(lastc); case '*': try('*',collectstars()) return(lastc); case ':': if(c==':') { c=getch(); if(c=='='){ c=getch(); return(COLON2EQ); } else return(COLONCOLON); } return(lastc); case '$': if( /* c=='_'&&okid(peekch())|| /* _id/_ID as id */ isalpha(c)) { int t; kollect(okid); t=identifier(0); return(t==NAME?INFIXNAME:t==CNAME?INFIXCNAME:'$'); } /* the last alternative is an error - caveat */ if('1'<=c&&c<='9') { int n=0; while(isdigit(c)&&n<1e6)n=10*n+c-'0',c=getch(); if(n>sreds) /* sreds==0 everywhere except in semantic redn clause */ printf("%ssyntax error: illegal symbol $%d%s\n", echoing?"\n":"",n,n>=1e6?"...":""), acterror(); else { yylval=mkgvar(n); return(NAME); } } if(c=='-') { if(!compiling) syntax("unexpected symbol $-\n"); else {c=getch(); yylval=common_stdin; return(CONST); }} /* NB we disallow recursive use of $($/+/-) inside $+ data whence addition of `compiling' to premises */ if(c==':') { c=getch(); if(c!='-')syntax("unexpected symbol $:\n"); else { if(!compiling) syntax("unexpected symbol $:-\n"); else {c=getch(); yylval=common_stdinb; return(CONST); }}} /* $:- */ if(c=='+') { /* if(!(commandmode&&compiling||magic)) syntax("unexpected symbol $+\n"); else /* disallow in scripts */ if(!compiling) syntax("unexpected symbol $+\n"); else { c=getch(); if(commandmode) yylval=cook_stdin; else yylval=ap(readvals(0,0),OFFSIDE); return(CONST); }} if(c=='$') { if(!(inlex==2||commandmode&&compiling)) syntax("unexpected symbol $$\n"); else { c=getch(); if(inlex) { yylval=mklexvar(0); return(NAME); } else return(DOLLAR2); }} if(c=='#') { if(inlex!=2)syntax("unexpected symbol $#\n"); else { c=getch(); yylval=mklexvar(1); return(NAME); }} if(c=='*') { c=getch(); yylval=ap(GETARGS,0); return(CONST); } if(c=='0') syntax("illegal symbol $0\n"); default: return(lastc); }} void layout() {L:while(c==' '||c=='\n'&&!commandmode||c=='\t') c= getch(); if(c==EOF&&commandmode){ c='\n'; return; } if(c=='|'&&peekch()=='|' /* ||comments */ || col==1&&line_no==1 /* added 19.11.2013 */ &&c=='#'&&peekch()=='!') /* UNIX magic string */ { while((c=getch())!='\n'&&c!=EOF); if(c==EOF&&!commandmode)return; c= '\n'; goto L; } } int collectstars() { int n=2; while(c=='*')c=getch(),n++; yylval= mktvar(n); return(TYPEVAR); } word gvars=NIL; /* list of grammar variables - no need to reset */ word mkgvar(i) /* make bound variable (corresponding to $i in bnf rule) */ word i; { word *p= &gvars; while(--i) { if(*p==NIL)*p=cons(sto_id("gvar"),NIL); p= &tl[*p]; } if(*p==NIL)*p=cons(sto_id("gvar"),NIL); return(hd[*p]); } /* all these variables have the same name, and are not in hashbucket */ word lexvar=0; word mklexvar(i) /* similar - corresponds to $$, $# on rhs of %lex rule */ word i; /* i=0 or 1 */ { extern word ltchar; if(!lexvar) lexvar=cons(sto_id("lexvar"),sto_id("lexvar")), id_type(hd[lexvar])=ltchar, id_type(tl[lexvar])=genlstat_t(); return(i?tl[lexvar]:hd[lexvar]); } int ARGC; char **ARGV; /* initialised in main(), see steer.c */ word conv_args() /* used to give access to command line args see case GETARGS in reduce.c */ { word i=ARGC,x=NIL; if(i==0)return(NIL); /* possible only if not invoked from a magic script */ { while(--i)x=cons(str_conv(ARGV[i]),x); x=cons(str_conv(ARGV[0]),x); } return(x); } word str_conv(s) /* convert C string to Miranda form */ char *s; { word x=NIL,i=strlen(s); while(i--)x=cons(s[i],x); return(x); } /* opposite of getstring() - see reduce.c */ int okpath(ch) int ch; { return(ch!='\"'&&ch!='\n'&&ch!='>'); } char *pathname() /* returns NULL if not valid pathname (in string quotes) */ { layout(); if(c=='<') /* alternative quotes <..> for system libraries */ { extern char *miralib; char *hold=dicp; c=getch(); (void)strcpy(dicp,miralib); dicp+=strlen(miralib); *dicp++ = '/'; kollect(okpath); dicp=hold; if(c!='>')return(NULL); c=' '; return(dicp); } if(c!='\"')return(NULL); c=getch(); if(c=='~') { char *h,*hold=dicp; extern char linebuf[]; *dicp++ = c; c=getch(); while(isalnum(c)||c=='-'||c=='_'||c=='.') *dicp++ = c, c=getch(); *dicp='\0'; if(h=gethome(hold+1)) (void)strcpy(hold,h),dicp=hold+strlen(hold); else (void)strcpy(&linebuf[0],hold), (void)strcpy(hold,prefixbase+prefix), dicp=hold+strlen(prefixbase+prefix), (void)strcpy(dicp,&linebuf[0]), dicp+=strlen(dicp); kollect(okpath); dicp=hold; } else if(c=='/') /* absolute pathname */ kollect(okpath); else { /* relative pathname */ char *hold=dicp; (void)strcpy(dicp,prefixbase+prefix); dicp+=strlen(prefixbase+prefix); kollect(okpath); dicp=hold; } if(c!='\"')return(NULL); c = ' '; return(dicp); } /* result is volatile - call keep(dicp) to retain */ void adjust_prefix(f) /* called at %insert and at loadfile, to get static pathname resolution */ char *f; { /* the directory part of the pathname f becomes the new prefix for pathnames, and we stack the current prefix */ char *g; prefixstack=strcons(prefix,prefixstack); prefix += strlen(prefixbase+prefix)+1; while(prefix+strlen(f)>=prefixlimit) /* check and fix overflow */ prefixlimit += 1024, prefixbase=realloc(prefixbase,prefixlimit); (void)strcpy(prefixbase+prefix,f); g=rindex(prefixbase+prefix,'/'); if(g)g[1]='\0'; else prefixbase[prefix]='\0'; } /* NOTES on how static pathname resolution is achieved: (the specification is that pathnames must always be resolved relative to the file in which they are encountered) Definition -- the 'prefix' of a pathname is the initial segment up to but not including the last occurrence of '/' (null if no '/' present). Keep the wd constant during compilation. Have a global char* prefix, initially null. 1) Whenever you read a relative pathname(), insert 'prefix' on the front of it. 2) On entering a new level of insert, stack old prefix and prefix becomes that of new file name. Done by calling adjust_prefix(). 3) On quitting a level of insert, unstack old prefix. */ int peekdig() { int ch = getc(s_in); ungetc(ch,s_in); return('0'<=ch&&ch<='9'); } int peekch() { word ch = getc(s_in); ungetc(ch,s_in); return(ch); } int openfile(n) /* returns 0 or 1 as indication of success - puts file on fileq if successful */ char *n; { FILE *f; f= fopen(n,"r"); if(f==NULL)return(0); fileq= cons(strcons(f,NIL),fileq); insertdepth++; return(1); } int identifier(s) /* recognises reserved words */ int s; /* flags looking for ul reserved words only */ { extern word lastid,initialising; if(inbnf==1) { /* only reserved nonterminals are `empty', `end', `error', `where' */ if(is("empty ")||is("e_m_p_t_y"))return(EMPTYSY); else if(is("end ")||is("e_n_d"))return(ENDSY); else if(is("error ")||is("e_r_r_o_r"))return(ERRORSY); else if(is("where ")||is("w_h_e_r_e"))return(WHERE); } else switch(dicp[0]) { case 'a': if(is("abstype")||is("a_b_s_t_y_p_e")) return(ABSTYPE); break; case 'd': if(is("div")||is("d_i_v")) return(DIV); break; case 'F': if(is("False")) /* True, False alleged to be predefined, not reserved (??) */ { yylval = False; return(CONST); } break; case 'i': if(is("if")||is("i_f")) return(IF); break; case 'm': if(is("mod")||is("m_o_d")) return(REM); break; case 'o': if(is("otherwise")||is("o_t_h_e_r_w_i_s_e")) return(OTHERWISE); break; case 'r': if(is("readvals")||is("r_e_a_d_v_a_l_s")) return(READVALSY); break; case 's': if(is("show")||is("s_h_o_w")) return(SHOWSYM); break; case 'T': if(is("True")) { yylval = True; return(CONST); } case 't': if(is("type")||is("t_y_p_e")) return(TYPE); break; case 'w': if(is("where")||is("w_h_e_r_e")) return(WHERE); if(is("with")||is("w_i_t_h")) return(WITH); break; } if(s){ syntax("illegal use of underlining\n"); return('_'); } yylval=name(); /* not a reserved word */ if(commandmode&&lastid==0&&id_type(yylval)!=undef_t)lastid=yylval; return(isconstructor(yylval)?CNAME:NAME); } word directive() /* these are of the form "%identifier" */ { extern word SYNERR,magic; word holdcol=col-1,holdlin=line_no; c = getch(); if(c=='%'){ c=getch(); return(ENDIR); } kollect(okulid); switch(dicp[0]=='_'&&dicp[1]==''?dicp[2]:dicp[0]) { case 'b': if(is("begin")||is("_^Hb_^He_^Hg_^Hi_^Hn")) if(inlex) return(LBEGIN); if(is("bnf")||is("_^Hb_^Hn_^Hf")) { setlmargin(); col=holdcol+4; /* `indent' to right hand end of directive */ return(BNF); } break; case 'e': if(is("export")||is("_e_x_p_o_r_t")) { if(magic)syntax( "%export directive not permitted in \"-exp\" script\n"); return(EXPORT); } break; case 'f': if(is("free")||is("_f_r_e_e")) { if(magic)syntax( "%free directive not permitted in \"-exp\" script\n"); return(FREE); } break; case 'i': if(is("include")||is("_i_n_c_l_u_d_e")) { if(!SYNERR){ layout(); setlmargin(); } /* does `indent' for grammar */ if(!pathname()) syntax("bad pathname after %include\n"); else yylval=strcons(addextn(1,dicp), fileinfo(get_fil(current_file),holdlin)), /* (includee,hereinfo) */ keep(dicp); return(INCLUDE); } if(is("insert")||is("_i_n_s_e_r_t")) { char *f=pathname(); if(!f)syntax("bad pathname after %insert\n"); else if(insertdepth<12&&openfile(f)) { adjust_prefix(f); vergstack=cons(lverge,vergstack); echostack=cons(echoing,echostack); litstack=cons(literate,litstack); linostack=strcons(line_no,linostack); line_no=0; atnl=1; /* was line_no=1; */ keep(dicp); current_file = make_fil(f,fm_time(f),0,NIL); files = append1(files,cons(current_file,NIL)); tl[hd[fileq]] = current_file; s_in = (FILE *)hd[hd[fileq]]; literate= peekch()=='>'||litname(f); col=lverge=holdcol; if(echoing) { putchar('\n'); if(!literate) if(litmain)putchar('>'),spaces(holdcol); else spaces(holdcol); } c = getch(); } /* used to precede previous cmd when echo was delayed by one char, see getch() */ else { int toomany=(insertdepth>=12); printf("%s%%insert error - cannot open \"%s\"\n", echoing?"\n":"",f); keep(dicp); if(toomany)printf( "too many nested %%insert directives (limit=%ld)\n", insertdepth); else files = append1(files,cons(make_fil(f,0,0,NIL),NIL)); /* line above for benefit of `oldfiles' */ acterror(); } return(yylex()); } break; case 'l': if(is("lex")||is("_^Hl_^He_^Hx")) { if(inlex)syntax("nested %lex not permitted\n"); /* due to use of global vars inlex, lexdefs */ return(LEX); } if(is("list")||is("_l_i_s_t")) { echoing=verbosity; return(yylex()); } break; case 'n': if(is("nolist")||is("_n_o_l_i_s_t")) { echoing=0; return(yylex()); } break; } if(echoing)putchar('\n'); printf("syntax error: unknown directive \"%%%s\"\n",dicp), acterror(); return(END); } int okid(ch) int ch; { return('a'<=ch&&ch<='z'||'A'<=ch&&ch<='Z'||'0'<=ch&&ch<='9' ||ch=='_'||ch=='\''); } int okulid(ch) int ch; { return('a'<=ch&&ch<='z'||'A'<=ch&&ch<='Z'||'0'<=ch&&ch<='9' ||ch=='_'||ch==''||ch=='\''); } void kollect(f) /* note top of dictionary used as work space to collect current token */ int (*f)(); { dicq= dicp; while((*f)(c)){ *dicq++ = c; c= getch(); } *dicq++ = '\0'; ovflocheck; } char *keep(p) /* call this to retain volatile string for later use */ char *p; { if(p==dicp)dicp= dicq; else (void)strcpy(dicp,p), p=dicp, dicp=dicq=dicp+strlen(dicp)+1, dic_check(); return(p); } void dic_check() /* called from REDUCE */ { ovflocheck; } void numeral() { word nflag=1; dicq= dicp; while(isdigit(c)) *dicq++ = c, c=getch(); if(c=='.'&&peekdig()) { *dicq++ = c, c=getch(); nflag=0; while(isdigit(c)) *dicq++ = c, c=getch(); } if(c=='e') { word np=0; *dicq++ = c, c=getch(); nflag=0; if(c=='+')c=getch(); else /* ignore + before exponent */ if(c=='-')*dicq++ = c, c=getch(); if(!isdigit(c)) /* e must be followed by some digits */ syntax("badly formed floating point number\n"); while(c=='0') *dicq++ = c, c=getch(); while(isdigit(c)) np++, *dicq++ = c, c=getch(); if(!nflag&&np>3) /* scanf falls over with silly exponents */ { syntax("floating point number out of range\n"); return; } } ovflocheck; if(nflag) /* `.' or `e' makes fractional */ *dicq = '\0', yylval= bigscan(dicp); else { double r=0.0; if(dicq-dicp>60) /* this allows 59 chars */ /* scanf crashes, on VAX, gives wrong answers, on ORION 1/05 */ { syntax("illegal floating point constant (too many digits)\n"); return; } *dicq = '\n'; sscanf(dicp,"%lf",&r); yylval= sto_dbl(r); } } void hexnumeral() /* added 21.11.2013 */ { word nflag=1; dicq= dicp; *dicq++ = c, c=getch(); /* 0 */ *dicq++ = c, c=getch(); /* x */ if(!isxdigit(c)&&c!='.')syntax("malformed hex number\n"); while(c=='0'&&isxdigit(peekch()))c=getch(); /* skip zeros before first nonzero digit */ while(isxdigit(c)) *dicq++ = c, c=getch(); ovflocheck; if(c=='.'||tolower(c)=='p') /* hex float, added 20.11.19 */ { double d; if(c=='.') { *dicq++ = c, c=getch(); while(isxdigit(c)) *dicq++ = c, c=getch(); } if(c=='p') { *dicq++ = c, c=getch(); if(c=='+'||c=='-')*dicq++ = c, c=getch(); if(!isdigit(c))syntax("malformed hex float\n"); while(isdigit(c)) *dicq++ = c, c=getch(); } ovflocheck; *dicq='\0'; if(dicq-dicp>60||sscanf(dicp,"%lf",&d)!=1) syntax("malformed hex float\n"); else yylval= sto_dbl(d); return; } *dicq = '\0'; yylval= bigxscan(dicp+2,dicq); } void octnumeral() /* added 21.11.2013 */ { word nflag=1; dicq= dicp; if(!isdigit(c))syntax("malformed octal number\n"); while(c=='0'&&isdigit(peekch()))c=getch(); /* skip zeros before first nonzero digit */ while(isdigit(c)&&c<='7') *dicq++ = c, c=getch(); if(isdigit(c))syntax("illegal digit in octal number\n"); ovflocheck; *dicq = '\0'; yylval= bigoscan(dicp,dicq); } word namebucket[128]; /* each namebucket has a list terminated by 0, not NIL */ int hash(s) /* returns a value in {0..127} */ char *s; { int h = *s; if(h)while(*++s)h ^= *s; /* guard necessary to deal with s empty */ return(h&127); } int isconstrname(s) char *s; { if(s[0]=='$')s++; return isupper(*s); /* formerly !islower */ } word getfname(x) /* nonterminals have an added ' ', getfname returns the corresponding function name */ word x; { char *p = get_id(x); dicq= dicp; while(*dicq++ = *p++); if(dicq-dicp<3)fprintf(stderr,"impossible event in getfname\n"),exit(1); dicq[-2] = '\0'; /* overwrite last char */ ovflocheck; return(name()); } int isnonterminal(x) word x; { char *n; if(tag[x]!=ID)return(0); n = get_id(x); return(n[strlen(n)-1]==' '); } word name() { word q,h; q= namebucket[h=hash(dicp)]; while(q&&!is(get_id(hd[q])))q= tl[q]; if(q==0) { q = sto_id(dicp); namebucket[h] = cons(q,namebucket[h]); keep(dicp); } else q= hd[q]; return(q); } /* note - keeping buckets sorted didn't seem to help (if anything slightly slower) probably because ordering only relevant if name not present, and outweighed by increased complexity of loop */ int inprelude=1; word make_id(n) /* used in mira_setup(), primdef(), predef(), all in steer.c */ char *n; { word x,h; h=hash(n); x = sto_id(inprelude?keep(n):n); namebucket[h] = cons(x,namebucket[h]); return(x); } word findid(n) /* like name() but returns NIL rather than create new id */ char *n; { word q; q= namebucket[hash(n)]; while(q&&!strcmp(n,get_id(hd[q]))==0)q= tl[q]; return(q?hd[q]:NIL); } word *pnvec=0,nextpn,pn_lim=200; /* private name vector */ void reset_pns() /* (re)initialise private name space */ { nextpn=0; if(!pnvec) { pnvec=(word *)malloc(pn_lim*sizeof(word)); if(pnvec==NULL)mallocfail("pnvec"); } } word make_pn(val) /* create new private name with value val */ word val; { if(nextpn==pn_lim) { pn_lim+=400; pnvec=(word *)realloc(pnvec,pn_lim*sizeof(word)); if(pnvec==NULL)mallocfail("pnvec"); } pnvec[nextpn]=strcons(nextpn,val); return(pnvec[nextpn++]); } word sto_pn(n) /* return n'th private name, extending pnvec if necessary */ word n; { if(n>=pn_lim) { while(pn_lim<=n)pn_lim+=400; pnvec=(word *)realloc(pnvec,pn_lim*sizeof(word)); if(pnvec==NULL)mallocfail("pnvec"); } while(nextpn<=n) /* NB allocates all missing names upto and including nth*/ pnvec[nextpn]=strcons(nextpn,UNDEF),nextpn++; return(pnvec[n]); } void mkprivate(x) /* disguise identifiers prior to removal from environment */ word x; /* used in setting up prelude - see main() in steer.c */ { while(x!=NIL) { char *s = get_id(hd[x]); get_id(hd[x])[0] += 128; /* hack to make private internal name */ x = tl[x]; } /* NB - doesn't change hashbucket */ inprelude=0; } word sl=100; void string() { word p; word ch,badch=0; c = getch(); ch= getlitch(); p= yylval= cons(NIL,NIL); while(ch!=EOF&&rawch!='\"'&&rawch!='\n') if(ch==-7) ch=getlitch(); else /* skip \& */ if(ch<0){ badch=ch; break; } else { p= tl[p]= cons(ch,NIL); ch= getlitch(); } yylval= tl[yylval]; if(badch)errclass(badch,1); if(rawch=='\n') syntax("non-escaped newline encountered inside string quotes\n"); else if(ch==EOF) { if(echoing)putchar('\n'); printf("syntax error: script ends inside unclosed string quotes - \n"); printf(" \""); while(yylval!=NIL&& sl-- ) { putchar(hd[yylval]); yylval= tl[yylval]; } printf("...\"\n"); acterror(); } } int charclass() { word p; word ch,badch=0,anti=0; c = getch(); if(c=='^')anti=1,c=getch(); ch= getlitch(); p= yylval= cons(NIL,NIL); while(ch!=EOF&&rawch!='`'&&rawch!='\n') if(ch==-7)ch=getlitch(); else /* skip \& */ if(ch<0){ badch=ch; break; } else { if(rawch=='-'&&hd[p]!=NIL&&hd[p]!=DOTDOT) ch=DOTDOT; /* non-initial, non-escaped '-' */ p= tl[p]= cons(ch,NIL); ch= getlitch(); } if(hd[p]==DOTDOT)hd[p]='-'; /* naturalise a trailing '-' */ for(p=yylval;tl[p]!=NIL;p=tl[p]) /* move each DOTDOT to front of range */ if(hd[tl[p]]==DOTDOT) { hd[tl[p]]=hd[p],hd[p]=DOTDOT; if(hd[tl[p]]>=hd[tl[tl[p]]]) syntax("illegal use of '-' in [charclass]\n"); } yylval= tl[yylval]; if(badch)errclass(badch,2); if(rawch=='\n') syntax("non-escaped newline encountered in char class\n"); else if(ch==EOF) { if(echoing)putchar('\n'); printf( "syntax error: script ends inside unclosed char class brackets - \n"); printf(" ["); while(yylval!=NIL&& sl-- ) { putchar(hd[yylval]); yylval= tl[yylval]; } printf("...]\n"); acterror(); } return(anti); } void reset_lex() /* called after an error */ { extern word errs,errline; extern char *current_script; /*printf("reset_lex()\n"); /* DEBUG */ if(!commandmode) { if(!errs)errs=fileinfo(get_fil(current_file),line_no); /* convention, if errs set contains location of error, otherwise pick up from current_file and line_no */ if(tl[errs]==0&&(char *)hd[errs]==current_script) /* at end of file, so line_no has been reset to 0 */ printf("error occurs at end of "); else printf("error found near line %ld of ",tl[errs]); printf("%sfile \"%s\"\ncompilation abandoned\n", (char *)hd[errs]==current_script?"":"%insert ", (char *)hd[errs]); if((char *)hd[errs]==current_script) errline=tl[errs]==0?lastline:tl[errs],errs=0; else { while(tl[linostack]!=NIL)linostack=tl[linostack]; errline=hd[linostack]; } /* tells editor where to find error - errline contains location of 1st error in main script, errs is hereinfo of upto one error in %insert script (each is 0 if not set) - some errors can set both */ } reset_state(); } void reset_state() /* reset all global variables used by compiler */ { extern word TABSTRS,SGC,newtyps,algshfns,showchain,inexplist,sreds, rv_script,idsused; /* printf("reset_state()\n"); /* DEBUG */ if(commandmode) while(c!='\n'&&c!=EOF)c=getc(s_in); /* no echo */ while(fileq!=NIL)fclose((FILE *)hd[hd[fileq]]),fileq=tl[fileq]; insertdepth= -1; s_in=stdin; echostack=idsused=prefixstack=litstack=linostack=vergstack =margstack=NIL; prefix=0; prefixbase[0]='\0'; echoing=verbosity&listing; brct=inbnf=sreds=inlex=inexplist=commandmode=lverge=col=lmargin=0; atnl=1; rv_script=0; algshfns=newtyps=showchain=SGC=TABSTRS=NIL; c=' '; line_no=0; litmain=literate=0; /* printf("exit reset_state()\n"); /* DEBUG */ } /* end of MIRANDA LEX ANALYSER */