diff options
Diffstat (limited to 'lex.c')
-rw-r--r-- | lex.c | 1220 |
1 files changed, 1220 insertions, 0 deletions
@@ -0,0 +1,1220 @@ +/* MIRANDA LEX ANALYSER */ + +/************************************************************************** + * Copyright (C) Research Software Limited 1985-90. All rights reserved. * + * The Miranda system is distributed as free software under the terms in * + * the file "COPYING" which is included in the distribution. * + * * + * Revised to C11 standard and made 64bit compatible, January 2020 * + *------------------------------------------------------------------------*/ + +#include "data.h" +#include "lex.h" +#include "big.h" +#include <errno.h> + +static int charclass(void); +static void chblank(char *); +static int collectstars(void); +static word directive(void); +static void hexnumeral(void); +static int identifier(int); +static void kollect(int(*f)()); +static void numeral(void); +static void octnumeral(void); +static int peekch(void); +static int peekdig(void); +static void string(void); + +extern word DICSPACE; /* see steer.c for default value */ +/* capacity in chars of dictionary space for storing identifiers and file names + to get a larger name space just increase this number */ +extern FILE *s_in; +extern word echoing,listing,verbosity,magic,inbnf,inlex; +word fileq=NIL; /* list of currently open-for-input files, of form + cons(strcons(stream,<ptr to element of 'files'>),...)*/ +word insertdepth= -1,margstack=NIL,col=0,lmargin=0; +word echostack=NIL; +word lverge=0,vergstack=NIL; +char *prefixbase; /* stores prefixes for pathnames, to get static resolution */ +word prefixlimit=1024; /* initial size of space for prefixes */ +word prefix,prefixstack=NIL; /* current prefix, stack of old prefixes */ +word atnl=1,line_no=0; +word lastline; +word litstack=NIL,linostack=NIL; +word c=' ', lastc; +word commandmode; +word common_stdin,common_stdinb,cook_stdin; +word litmain=0,literate=0; /* flags "literate" comment convention */ +char *dic,*dicp,*dicq; +char *pathname(); + +void setupdic() +{ dicp=dicq=dic=malloc(DICSPACE); + if(dic==NULL)mallocfail("dictionary"); + /* it is not permissible to realloc dic, because at the moment identifiers + etc. contain absolute pointers into the dictionary space - so we must + choose fairly large initial value for DICSPACE. Fix this later */ + prefixbase=malloc(prefixlimit); + prefixbase[0]='\0'; + prefix=0; +} + +/* this allows ~login convention in filenames */ +/* #define okgetpwnam +/* suppress 26.5.06 getpwnam causes runtime error when statically linked (Linux) */ + +#ifdef okgetpwnam +#include <pwd.h> +struct passwd *getpwnam(); +#endif +char *getenv(); + +char *gethome(n) /* for expanding leading `~' in tokens and pathnames */ +char *n; +{ struct passwd *pw; + if(n[0]=='\0')return(getenv("HOME")); +#ifdef okgetpwnam + if(pw=getpwnam(n))return(pw->pw_dir); +#endif + return(NULL); +} + +#define ovflocheck if(dicq-dic>DICSPACE)dicovflo() + +void dicovflo() /* is this called everywhere it should be? Check later */ +{ fprintf(stderr,"\npanic: dictionary overflow\n"); exit(1); } + +char *token() /* lex analyser for command language (very simple) */ +{ extern char *current_script; + word ch=getchar(); + dicq = dicp; /* uses top of dictionary as temporary work space */ + while(ch==' '||ch=='\t')ch=getchar(); + if(ch=='~') + { char *h; + *dicq++ = ch; + ch=getchar(); + while(isalnum(ch)||ch=='-'||ch=='_'||ch=='.') + *dicq++ = ch,ch=getchar(); + /* NB csh does not allow `.' in user ids when expanding `~' + but this may be a mistake */ + *dicq='\0'; + if(h=gethome(dicp+1)) + (void)strcpy(dicp,h),dicq=dicp+strlen(dicp); + } +#ifdef SPACEINFILENAMES + if(ch!='"'&&ch!='<') /* test added 9.5.06 see else part */ +#endif + while(!isspace(ch)&&ch!=EOF) + { *dicq++ = ch; + if(ch=='%') + if(dicq[-2]=='\\')(--dicq)[-1]='%'; + else dicq--,(void)strcpy(dicq,current_script),dicq+=strlen(dicq); + ch=getchar(); } +#ifdef SPACEINFILENAMES + else { word closeq= ch=='<'?'>':'"'; /* this branch added 9.5.06 */ + *dicq++ = ch; /* to allow spaces in "tok" or <tok> */ + ch=getchar(); + while(ch!=closeq&&ch!='\n'&&ch!=EOF) + *dicq++ = ch, ch=getchar(); + if(ch==closeq)*dicq++ = ch, ch=getchar(); } +#endif + *dicq++ = '\0'; + ovflocheck; + while(ch==' '||ch=='\t')ch=getchar(); + ungetc(ch,stdin); + return(*dicp=='\0'?(char *)NULL:dicp); +} /* NB - if no token returns NULL rather than pointer to empty string */ + +char *addextn(b,s) /* if(b)force s to end in ".m", and resolve <quotes> */ +word b; +char *s; +{ extern char *miralib; + extern char linebuf[]; + word n=strlen(s); + /* printf("addextn(%s)\n",s); /* DEBUG */ + if(s[0]=='<'&&s[n-1]=='>') + { static int miralen=0; /* code to handle quotes added 21/1/87 */ + if(!miralen)miralen=strlen(miralib); + strcpy(linebuf,miralib); + linebuf[miralen]= '/'; + strcpy(linebuf+miralen+1,s+1); + strcpy(dicp,linebuf); + s=dicp; + n=n+miralen-1; + dicq=dicp+n+1; + dicq[-1] = '\0'; /* overwrites '>' */ + ovflocheck; } else + if(s[0]=='\"'&&s[n-1]=='\"') + { /*strip quotes */ + dicq=dicp; s++; + while(*s)*dicq++ = *s++; + dicq[-1]='\0'; /* overwrites '"' */ + s=dicp; n=n-2; + } + if(!b||strcmp(s+n-2,".m")==0)return(s); + if(s==dicp)dicq--;/*if s in scratch area at top of dic, extend in situ*/ + else { /* otherwise build new copy at top of dic */ + dicq=dicp; + while(*s)*dicq++ = *s++; + *dicq = '\0'; } + if(strcmp(dicq-2,".x")==0)dicq -= 2; else + if(dicq[-1]=='.')dicq -= 1; + (void)strcpy(dicq,".m"); + dicq += 3; + ovflocheck; + /* printf("return(%s)\n",dicp); /* DEBUG */ + return(dicp); +} /* NB - call keep(dicp) if the result is to be retained */ + +word brct=0; + +void spaces(n) +word n; +{ while(n-- >0)putchar(' '); +} + +int litname(s) +char *s; +{ word n=strlen(s); + return(n>=6 && strcmp(s+n-6,".lit.m")==0); +} + +int getch() /* keeps track of current position in the variable "col"(column) */ +{ word ch= getc(s_in); + if(ch==EOF&&!atnl&&tl[fileq]==NIL) /* badly terminated top level file */ + { atnl=1; return('\n'); } + if(atnl) + { if((line_no==0&&!commandmode||magic&&line_no==1)&&litstack==NIL) + litmain=literate= (ch=='>')||litname(get_fil(current_file)); + if(literate) + { word i=0; + while(ch!=EOF&&ch!='>') + { ungetc(ch,s_in); + line_no++; + (void)fgets(dicp,250,s_in); + if(i==0&&line_no>1)chblank(dicp); i++; + if(echoing)spaces(lverge),fputs(dicp,stdout); + ch=getc(s_in); } + if((i>1||line_no==1&&i==1)&&ch!=EOF)chblank(dicp); + if(ch=='>') + { if(echoing)putchar(ch),spaces(lverge);ch=getc(s_in); } + } /* supports alternative `literate' comment convention */ + atnl=0; col= lverge+literate; + if(!commandmode&&ch!=EOF)line_no++; } + if(echoing&&ch!=EOF) + { putchar(ch); + if(ch=='\n'&&!literate) + if(litmain)putchar('>'),spaces(lverge); + else spaces(lverge); + } + if(ch=='\t')col= ((col-lverge)/8 + 1)*8+lverge; + else col++; + if(ch=='\n')atnl= 1; + return(ch); } + +int blankerr=0; + +void chblank(s) +char *s; +{ while(*s==' '||*s=='\t')s++; + if(*s=='\n')return; + syntax("formal text not delimited by blank line\n"); + blankerr=1; + reset(); /* easiest way to recover is to pretend it was an interrupt */ +} + +/* getlitch gets a character from input like getch, but using C escaping + conventions if the char is backslash -- for use in reading character + and string constants */ + +int rawch; +/* it is often important to know, when certain characters are returned (e.g. + quotes and newlines) whether they were escaped or literal */ + +int errch; /* for reporting unrecognised \escape */ + +int getlitch() +{ extern int UTF8; + int ch=c; + rawch = ch; + if(ch=='\n')return(ch); /* always an error */ + if(UTF8&&ch>127) + { /* UTF-8 uses 2 or 3 bytes for unicode points to 0xffff */ + word ch1=c=getch(); + if((ch&0xe0)==0xc0) /* 2 bytes */ + { if((ch1&0xc0)!=0x80) + return -5; /* not valid UTF8 */ + c=getch(); + return sto_char((ch&0x1f)<<6|ch1&0x3f); } + word ch2=c=getch(); + if((ch&0xf0)==0xe0) /* 3 bytes */ + { if((ch1&0xc0)!=0x80||(ch2&0xc0)!=0x80) + return -5; /* not valid UTF8 */ + c=getch(); + return sto_char((ch&0xf)<<12|(ch1&0x3f)<<6|ch2&0x3f); } + word ch3=c=getch(); + if((ch&0xf8)==0xf0) /* 4 bytes, beyond basic multiligual plane */ + { if((ch1&0xc0)!=0x80||(ch2&0xc0)!=0x80||(ch3&0xc0)!=0x80) + return -5; /* not valid UTF8 */ + c=getch(); + return((ch&7)<<18|(ch1&0x3f)<<12|(ch2&0x3f)<<6|ch3&0x3f); } + return(-5); + /* not UTF8 */ + } + if(ch!='\\') + { c=getch(); return(ch); } + ch = getch(); + c = getch(); + switch(ch) + { case '\n': return(getlitch()); /* escaped nl was handled in 'getch()' */ + case 'a': return('\a'); + case 'b': return('\b'); + case 'f': return('\f'); /* form feed */ + case 'n': return('\n'); /* newline, == linefeed */ + case 'r': return('\r'); /* carriage return */ + case 't': return('\t'); + case 'v': return('\v'); + case 'X': /* omit for Haskell escape rules, see also lines marked H */ + case 'x': if(isxdigit(c)) + { int value, N=ch=='x'?4:6; /* N=7 for Haskell escape rules */ + char hold[8]; + ch = c; + int count=0; + /* while(ch=='0'&&isxdigit(peekch()))ch=getch(); /* H-lose leading 0s */ + while(isxdigit(ch)&&count<N) + hold[count++]=ch,ch=getch(); + /* read upto N hex digits */ + hold[count] = '\0'; + sscanf(hold,"%x",&value); + c = ch; + return value>UMAX?-3 /* \x out of range */ + :sto_char(value); } + else return -2; /* \x with no hex digits */ + default: if('0'<=ch&&ch<='9') + { word n=ch-'0',count=1,N=3; /* N=8 for Haskell escape rules */ + ch = c; + /* while(ch=='0'&&isdigit(peekch()))ch=getch(); /* H-lose leading 0s */ + while(isdigit(ch)&&count<N) + /* read upto N digits */ + { n = 10*n+ch-'0'; + count++; + ch = getch(); } + c = ch; + return /* n>UMAX?-4: /* H \decimal out of range */ + sto_char(n); } + if(ch=='\''||ch=='"'||ch=='\\'||ch=='`')return(ch); /* see note */ + if(ch=='&')return -7; /* Haskell null escape, accept silently */ + errch=ch<=255?ch:'?'; + return -6; /* unrecognised \something */ + } +} /* note: we accept \` for ` because getlitch() is used by charclass() */ + +char *rdline() /* used by the "!" command -- see steer.c */ +{ extern char *current_script; + static char linebuf[BUFSIZE]; + char *p=linebuf; + word ch=getchar(),expansion=0; + while(ch==' '||ch=='\t')ch=getchar(); + if(ch=='\n'||ch=='!'&&!(*linebuf)) + { /* "!!" or "!" on its own means repeat last !command */ + if(*linebuf)printf("!%s",linebuf); + while(ch!='\n'&&ch!=EOF)ch=getchar(); + return(linebuf); } + if(ch=='!') + expansion=1,p=linebuf+strlen(linebuf)-1; /* p now points at old '\n' */ + else ungetc(ch,stdin); + while((*p++ =ch=getchar())!='\n'&&ch!=EOF) + if(p-linebuf>=BUFSIZE) + { *p='\0'; + fprintf(stderr,"sorry, !command too long (limit=%d chars): %s...\n", + BUFSIZE,linebuf); + while((ch=getchar())!='\n'&&ch!=EOF); + return(NULL); + } else + if(p[-1]=='%') + if(p>linebuf+1&&p[-2]=='\\')(--p)[-1]='%'; else + { (void)strncpy(p-1,current_script,linebuf+BUFSIZE-p); + p = linebuf+strlen(linebuf); + expansion = 1; + } + *p = '\0'; + if(expansion)printf("!%s",linebuf); + return(linebuf); } + +void setlmargin() /* this and the next routine are used to enforce the offside + rule ("yylex" refuses to read a symbol if col<lmargin) */ +{ margstack= cons(lmargin,margstack); + if(lmargin<col)lmargin= col; } /* inner scope region cannot "protrude" */ + +void unsetlmargin() +{ if(margstack==NIL)return; /* in case called after `syntax("..")' */ + lmargin= hd[margstack]; + margstack= tl[margstack]; } + +int okulid(int); +int PREL=1; + +void errclass(word val, word string) +/* diagnose error in charclass, string or char const */ +{ char *s = string==2?"char class":string?"string":"char const"; + if(val==-2)printf("\\x with no xdigits in %s\n",s); else + if(val==-3)printf("\\hexadecimal escape out of range in %s\n",s); else + if(val==-4)printf("\\decimal escape out of range in %s\n",s); else + if(val==-5)printf("unrecognised character in %s" + "(UTF8 error)\n",s); else + if(val==-6)printf("unrecognised escape \\%c in %s\n",errch,s); else + if(val==-7)printf("illegal use of \\& in char const\n"); else + printf("unknown error in %s\n",s); + acterror(); } + +word yylex() /* called by YACC to get the next symbol */ +{ extern word SYNERR,exportfiles,inexplist,sreds; + /* SYNERR flags context sensitive syntax error detected in actions */ + if(SYNERR)return(END); /* tell YACC to go home */ + layout(); + if(c=='\n') /* can only occur in command mode */ +/* if(magic){ commandmode=0; /* expression just read, now script */ +/* line_no=2; +/* return(c); } else /* no longer relevant 26.11.2019 */ + return(END); + if(col<lmargin) + if(c=='='&&(margstack==NIL||col>=hd[margstack]))/* && part fixes utah.bug*/ + { c = getch(); + return(ELSEQ); /* ELSEQ means "OFFSIDE =" */ + } + else return(OFFSIDE); + if(c==';') /* fixes utah2.bug */ + { c=getch(); layout(); + if(c=='='&&(margstack==NIL||col>=hd[margstack])) + { c = getch(); + return(ELSEQ); /* ELSEQ means "OFFSIDE =" */ + } + else return(';'); + } + if( + /* c=='_'&&okid(peekch()) || /* _id/_ID as lowercase id */ + isalpha(c)){ kollect(okid); + if(inlex==1){ layout(); + yylval=name(); + return(c=='='?LEXDEF: + isconstructor(yylval)?CNAME: + NAME); } + if(inbnf==1) + /* add trailing space to nonterminal to avoid clash + with ordinary names */ + dicq[-1] = ' ', + *dicq++ = '\0'; + return(identifier(0)); } + if('0'<=c&&c<='9'||c=='.'&&peekdig()) + { if(c=='0'&&tolower(peekch())=='x') + hexnumeral(); else /* added 21.11.2013 */ + if(c=='0'&&tolower(peekch())=='o') + getch(),c=getch(),octnumeral(); /* added 21.11.2013 */ + else numeral(); + return(CONST); } + if(c=='%'&&!commandmode)return(directive()); + if(c=='\'') + { c = getch(); + yylval= getlitch(); + if(yylval<0){ errclass(yylval,0); return CONST; } + if(!is_char(yylval)) + printf("%simpossible event while reading char const ('\\%lu\')\n", + echoing?"\n":"",yylval), + acterror(); + if(rawch=='\n'||c!='\'')syntax("improperly terminated char const\n"); + else c= getch(); + return(CONST); } + if(inexplist&&(c=='\"'||c=='<')) + { if(!pathname())syntax("badly formed pathname in %export list\n"); + else exportfiles=strcons(addextn(1,dicp),exportfiles), + keep(dicp); + return(PATHNAME); } + if(inlex==1&&c=='`') + { return(charclass()?ANTICHARCLASS:CHARCLASS); } + if(c=='\"') + { string(); + if(yylval==NIL)yylval=NILS; /* to help typechecker! */ + return(CONST); } + if(inbnf==2) /* fiddle to offside rule in grammars */ + if(c=='[')brct++; else if(c==']')brct--; else + if(c=='|'&&brct==0) + return(OFFSIDE); + if(c==EOF) + { if(tl[fileq]==NIL&&margstack!=NIL)return(OFFSIDE); /* to fix dtbug */ + fclose((FILE *)hd[hd[fileq]]); + fileq= tl[fileq]; insertdepth--; + if(fileq!=NIL&&hd[echostack]) + { if(literate)putchar('>'),spaces(lverge); + printf("<end of insert>"); } + s_in= fileq==NIL?stdin:(FILE *)hd[hd[fileq]]; + c= ' '; + if(fileq==NIL) + { lverge=c=col=lmargin=0; + /* c=0; necessary because YACC sometimes reads 1 token past END */ + atnl=1; + echoing=verbosity&listing; + lastline=line_no; + /* hack so errline can be set right if err at end of file */ + line_no=0; + litmain=literate=0; + return(END); } + else { current_file = tl[hd[fileq]]; + prefix=hd[prefixstack]; + prefixstack=tl[prefixstack]; + echoing=hd[echostack]; + echostack=tl[echostack]; + lverge=hd[vergstack]; + vergstack=tl[vergstack]; + literate=hd[litstack]; + litstack=tl[litstack]; + line_no=hd[linostack]; + linostack=tl[linostack]; } + return(yylex()); } + lastc= c; + c= getch(); +#define try(x,y) if(c==x){ c=getch(); return(y); } + switch(lastc) { + case '_': if(c=='') /* underlined something */ + { c=getch(); + if(c=='<'){ c=getch(); return(LE); } + if(c=='>'){ c=getch(); return(GE); } + if(c=='%'&&!commandmode)return(directive()); + if(isalpha(c)) /* underlined reserved word */ + { kollect(okulid); + if(dicp[1]=='_'&&dicp[2]=='') + return(identifier(1)); } + syntax("illegal use of underlining\n"); + return('_'); } + return(lastc); + case '-': try('>',ARROW) try('-',MINUSMINUS) return(lastc); + case '<': try('-',LEFTARROW) try('=',LE) return(lastc); + case '=': if(c=='>'){ syntax("unexpected symbol =>\n"); return '='; } + try('=',EQEQ) return(lastc); + case '+': try('+',PLUSPLUS) return(lastc); + case '.': if(c=='.') + { c=getch(); + return(DOTDOT); + } + return(lastc); + case '\\': try('/',VEL) return(lastc); + case '>': try('=',GE) return(lastc); + case '~': try('=',NE) return(lastc); + case '&': if(c=='>') + { c=getch(); + if(c=='>')yylval=1; + else yylval=0,ungetc(c,s_in); + c=' '; + return(TO); } + return(lastc); + case '/': try('/',DIAG) return(lastc); + case '*': try('*',collectstars()) return(lastc); + case ':': if(c==':') + { c=getch(); + if(c=='='){ c=getch(); return(COLON2EQ); } + else return(COLONCOLON); + } + return(lastc); + case '$': if( + /* c=='_'&&okid(peekch())|| /* _id/_ID as id */ + isalpha(c)) + { int t; + kollect(okid); + t=identifier(0); + return(t==NAME?INFIXNAME:t==CNAME?INFIXCNAME:'$'); } + /* the last alternative is an error - caveat */ + if('1'<=c&&c<='9') + { int n=0; + while(isdigit(c)&&n<1e6)n=10*n+c-'0',c=getch(); + if(n>sreds) + /* sreds==0 everywhere except in semantic redn clause */ + printf("%ssyntax error: illegal symbol $%d%s\n", + echoing?"\n":"",n,n>=1e6?"...":""), + acterror(); + else { yylval=mkgvar(n); return(NAME); } + } + if(c=='-') + { if(!compiling) + syntax("unexpected symbol $-\n"); else + {c=getch(); yylval=common_stdin; return(CONST); }} + /* NB we disallow recursive use of $($/+/-) inside $+ data + whence addition of `compiling' to premises */ + if(c==':') + { c=getch(); + if(c!='-')syntax("unexpected symbol $:\n"); else + { if(!compiling) + syntax("unexpected symbol $:-\n"); else + {c=getch(); yylval=common_stdinb; return(CONST); }}} /* $:- */ + if(c=='+') + { /* if(!(commandmode&&compiling||magic)) + syntax("unexpected symbol $+\n"); else /* disallow in scripts */ + if(!compiling) + syntax("unexpected symbol $+\n"); else + { c=getch(); + if(commandmode) + yylval=cook_stdin; + else yylval=ap(readvals(0,0),OFFSIDE); + return(CONST); }} + if(c=='$') + { if(!(inlex==2||commandmode&&compiling)) + syntax("unexpected symbol $$\n"); else + { c=getch(); + if(inlex) { yylval=mklexvar(0); return(NAME); } + else return(DOLLAR2); }} + if(c=='#') + { if(inlex!=2)syntax("unexpected symbol $#\n"); else + { c=getch(); yylval=mklexvar(1); return(NAME); }} + if(c=='*') + { c=getch(); yylval=ap(GETARGS,0); return(CONST); } + if(c=='0') + syntax("illegal symbol $0\n"); + default: return(lastc); +}} + +void layout() +{L:while(c==' '||c=='\n'&&!commandmode||c=='\t') c= getch(); + if(c==EOF&&commandmode){ c='\n'; return; } + if(c=='|'&&peekch()=='|' /* ||comments */ + || col==1&&line_no==1 /* added 19.11.2013 */ + &&c=='#'&&peekch()=='!') /* UNIX magic string */ + { while((c=getch())!='\n'&&c!=EOF); + if(c==EOF&&!commandmode)return; + c= '\n'; + goto L; } +} + +int collectstars() +{ int n=2; + while(c=='*')c=getch(),n++; + yylval= mktvar(n); + return(TYPEVAR); +} + +word gvars=NIL; /* list of grammar variables - no need to reset */ + +word mkgvar(i) /* make bound variable (corresponding to $i in bnf rule) */ +word i; +{ word *p= &gvars; + while(--i) + { if(*p==NIL)*p=cons(sto_id("gvar"),NIL); + p= &tl[*p]; } + if(*p==NIL)*p=cons(sto_id("gvar"),NIL); + return(hd[*p]); +} /* all these variables have the same name, and are not in hashbucket */ + +word lexvar=0; + +word mklexvar(i) /* similar - corresponds to $$, $# on rhs of %lex rule */ +word i; /* i=0 or 1 */ +{ extern word ltchar; + if(!lexvar) + lexvar=cons(sto_id("lexvar"),sto_id("lexvar")), + id_type(hd[lexvar])=ltchar, + id_type(tl[lexvar])=genlstat_t(); + return(i?tl[lexvar]:hd[lexvar]); +} + +int ARGC; +char **ARGV; /* initialised in main(), see steer.c */ + +word conv_args() /* used to give access to command line args + see case GETARGS in reduce.c */ +{ word i=ARGC,x=NIL; + if(i==0)return(NIL); /* possible only if not invoked from a magic script */ + { while(--i)x=cons(str_conv(ARGV[i]),x); + x=cons(str_conv(ARGV[0]),x); } + return(x); +} + +word str_conv(s) /* convert C string to Miranda form */ +char *s; +{ word x=NIL,i=strlen(s); + while(i--)x=cons(s[i],x); + return(x); +} /* opposite of getstring() - see reduce.c */ + +int okpath(ch) +int ch; +{ return(ch!='\"'&&ch!='\n'&&ch!='>'); } + +char *pathname() /* returns NULL if not valid pathname (in string quotes) */ +{ layout(); + if(c=='<') /* alternative quotes <..> for system libraries */ + { extern char *miralib; + char *hold=dicp; + c=getch(); + (void)strcpy(dicp,miralib); + dicp+=strlen(miralib); + *dicp++ = '/'; + kollect(okpath); + dicp=hold; + if(c!='>')return(NULL); + c=' '; + return(dicp); } + if(c!='\"')return(NULL); + c=getch(); + if(c=='~') + { char *h,*hold=dicp; + extern char linebuf[]; + *dicp++ = c; + c=getch(); + while(isalnum(c)||c=='-'||c=='_'||c=='.') + *dicp++ = c, c=getch(); + *dicp='\0'; + if(h=gethome(hold+1)) + (void)strcpy(hold,h),dicp=hold+strlen(hold); + else (void)strcpy(&linebuf[0],hold), + (void)strcpy(hold,prefixbase+prefix), + dicp=hold+strlen(prefixbase+prefix), + (void)strcpy(dicp,&linebuf[0]), + dicp+=strlen(dicp); + kollect(okpath); + dicp=hold; + } else + if(c=='/') /* absolute pathname */ + kollect(okpath); + else { /* relative pathname */ + char *hold=dicp; + (void)strcpy(dicp,prefixbase+prefix); + dicp+=strlen(prefixbase+prefix); + kollect(okpath); + dicp=hold; } + if(c!='\"')return(NULL); + c = ' '; + return(dicp); +} /* result is volatile - call keep(dicp) to retain */ + +void adjust_prefix(f) /* called at %insert and at loadfile, to get static pathname + resolution */ +char *f; +{ /* the directory part of the pathname f becomes the new + prefix for pathnames, and we stack the current prefix */ + char *g; + prefixstack=strcons(prefix,prefixstack); + prefix += strlen(prefixbase+prefix)+1; + while(prefix+strlen(f)>=prefixlimit) /* check and fix overflow */ + prefixlimit += 1024, prefixbase=realloc(prefixbase,prefixlimit); + (void)strcpy(prefixbase+prefix,f); + g=rindex(prefixbase+prefix,'/'); + if(g)g[1]='\0'; + else prefixbase[prefix]='\0'; +} + +/* NOTES on how static pathname resolution is achieved: +(the specification is that pathnames must always be resolved relative to the +file in which they are encountered) +Definition -- the 'prefix' of a pathname is the initial segment up to but not +including the last occurrence of '/' (null if no '/' present). +Keep the wd constant during compilation. Have a global char* prefix, initially +null. +1) Whenever you read a relative pathname(), insert 'prefix' on the front of it. +2) On entering a new level of insert, stack old prefix and prefix becomes that + of new file name. Done by calling adjust_prefix(). +3) On quitting a level of insert, unstack old prefix. +*/ + +int peekdig() +{ int ch = getc(s_in); + ungetc(ch,s_in); + return('0'<=ch&&ch<='9'); +} + +int peekch() +{ word ch = getc(s_in); + ungetc(ch,s_in); + return(ch); +} + +int openfile(n) /* returns 0 or 1 as indication of success - puts file on fileq + if successful */ +char *n; +{ FILE *f; + f= fopen(n,"r"); + if(f==NULL)return(0); + fileq= cons(strcons(f,NIL),fileq); + insertdepth++; + return(1); +} + +int identifier(s) /* recognises reserved words */ +int s; /* flags looking for ul reserved words only */ +{ extern word lastid,initialising; + if(inbnf==1) + { /* only reserved nonterminals are `empty', `end', `error', `where' */ + if(is("empty ")||is("e_m_p_t_y"))return(EMPTYSY); else + if(is("end ")||is("e_n_d"))return(ENDSY); else + if(is("error ")||is("e_r_r_o_r"))return(ERRORSY); else + if(is("where ")||is("w_h_e_r_e"))return(WHERE); } + else + switch(dicp[0]) + { case 'a': if(is("abstype")||is("a_b_s_t_y_p_e")) + return(ABSTYPE); + break; + case 'd': if(is("div")||is("d_i_v")) + return(DIV); + break; + case 'F': if(is("False")) /* True, False alleged to be predefined, not + reserved (??) */ + { yylval = False; + return(CONST); } + break; + case 'i': if(is("if")||is("i_f")) + return(IF); + break; + case 'm': if(is("mod")||is("m_o_d")) + return(REM); + break; + case 'o': if(is("otherwise")||is("o_t_h_e_r_w_i_s_e")) + return(OTHERWISE); + break; + case 'r': if(is("readvals")||is("r_e_a_d_v_a_l_s")) + return(READVALSY); + break; + case 's': if(is("show")||is("s_h_o_w")) + return(SHOWSYM); + break; + case 'T': if(is("True")) + { yylval = True; + return(CONST); } + case 't': if(is("type")||is("t_y_p_e")) + return(TYPE); + break; + case 'w': if(is("where")||is("w_h_e_r_e")) + return(WHERE); + if(is("with")||is("w_i_t_h")) + return(WITH); + break; + } + if(s){ syntax("illegal use of underlining\n"); return('_'); } + yylval=name(); /* not a reserved word */ + if(commandmode&&lastid==0&&id_type(yylval)!=undef_t)lastid=yylval; + return(isconstructor(yylval)?CNAME:NAME); +} + +word directive() /* these are of the form "%identifier" */ +{ extern word SYNERR,magic; + word holdcol=col-1,holdlin=line_no; + c = getch(); + if(c=='%'){ c=getch(); return(ENDIR); } + kollect(okulid); + switch(dicp[0]=='_'&&dicp[1]==''?dicp[2]:dicp[0]) + { case 'b': if(is("begin")||is("_^Hb_^He_^Hg_^Hi_^Hn")) + if(inlex) + return(LBEGIN); + if(is("bnf")||is("_^Hb_^Hn_^Hf")) + { setlmargin(); col=holdcol+4; + /* `indent' to right hand end of directive */ + return(BNF); } + break; + case 'e': if(is("export")||is("_e_x_p_o_r_t")) + { if(magic)syntax( + "%export directive not permitted in \"-exp\" script\n"); + return(EXPORT); } + break; + case 'f': if(is("free")||is("_f_r_e_e")) + { if(magic)syntax( + "%free directive not permitted in \"-exp\" script\n"); + return(FREE); } + break; + case 'i': if(is("include")||is("_i_n_c_l_u_d_e")) + { if(!SYNERR){ layout(); setlmargin(); } + /* does `indent' for grammar */ + if(!pathname()) + syntax("bad pathname after %include\n"); + else yylval=strcons(addextn(1,dicp), + fileinfo(get_fil(current_file),holdlin)), + /* (includee,hereinfo) */ + keep(dicp); + return(INCLUDE); } + if(is("insert")||is("_i_n_s_e_r_t")) + { char *f=pathname(); + if(!f)syntax("bad pathname after %insert\n"); else + if(insertdepth<12&&openfile(f)) + { adjust_prefix(f); + vergstack=cons(lverge,vergstack); + echostack=cons(echoing,echostack); + litstack=cons(literate,litstack); + linostack=strcons(line_no,linostack); + line_no=0; atnl=1; /* was line_no=1; */ + keep(dicp); + current_file = make_fil(f,fm_time(f),0,NIL); + files = append1(files,cons(current_file,NIL)); + tl[hd[fileq]] = current_file; + s_in = (FILE *)hd[hd[fileq]]; + literate= peekch()=='>'||litname(f); + col=lverge=holdcol; + if(echoing) + { putchar('\n'); + if(!literate) + if(litmain)putchar('>'),spaces(holdcol); + else spaces(holdcol); } + c = getch(); } /* used to precede previous cmd when echo + was delayed by one char, see getch() */ + else { int toomany=(insertdepth>=12); + printf("%s%%insert error - cannot open \"%s\"\n", + echoing?"\n":"",f); + keep(dicp); + if(toomany)printf( + "too many nested %%insert directives (limit=%ld)\n", + insertdepth); + else + files = append1(files,cons(make_fil(f,0,0,NIL),NIL)); + /* line above for benefit of `oldfiles' */ + acterror(); } + return(yylex()); } + break; + case 'l': if(is("lex")||is("_^Hl_^He_^Hx")) + { if(inlex)syntax("nested %lex not permitted\n"); + /* due to use of global vars inlex, lexdefs */ + return(LEX); } + if(is("list")||is("_l_i_s_t")) + { echoing=verbosity; return(yylex()); } + break; + case 'n': if(is("nolist")||is("_n_o_l_i_s_t")) + { echoing=0; return(yylex()); } + break; + } + if(echoing)putchar('\n'); + printf("syntax error: unknown directive \"%%%s\"\n",dicp), + acterror(); + return(END); +} + +int okid(ch) +int ch; +{ return('a'<=ch&&ch<='z'||'A'<=ch&&ch<='Z'||'0'<=ch&&ch<='9' + ||ch=='_'||ch=='\''); } + +int okulid(ch) +int ch; +{ return('a'<=ch&&ch<='z'||'A'<=ch&&ch<='Z'||'0'<=ch&&ch<='9' + ||ch=='_'||ch==''||ch=='\''); } + +void kollect(f) +/* note top of dictionary used as work space to collect current token */ +int (*f)(); +{ dicq= dicp; + while((*f)(c)){ *dicq++ = c; c= getch(); } + *dicq++ = '\0'; + ovflocheck; +} + +char *keep(p) /* call this to retain volatile string for later use */ +char *p; +{ if(p==dicp)dicp= dicq; + else (void)strcpy(dicp,p), + p=dicp, + dicp=dicq=dicp+strlen(dicp)+1, + dic_check(); + return(p); +} + +void dic_check() /* called from REDUCE */ +{ ovflocheck; } + +void numeral() +{ word nflag=1; + dicq= dicp; + while(isdigit(c)) + *dicq++ = c, c=getch(); + if(c=='.'&&peekdig()) + { *dicq++ = c, c=getch(); nflag=0; + while(isdigit(c)) + *dicq++ = c, c=getch(); } + if(c=='e') + { word np=0; + *dicq++ = c, c=getch(); nflag=0; + if(c=='+')c=getch(); else /* ignore + before exponent */ + if(c=='-')*dicq++ = c, c=getch(); + if(!isdigit(c)) /* e must be followed by some digits */ + syntax("badly formed floating point number\n"); + while(c=='0') + *dicq++ = c, c=getch(); + while(isdigit(c)) + np++, *dicq++ = c, c=getch(); + if(!nflag&&np>3) /* scanf falls over with silly exponents */ + { syntax("floating point number out of range\n"); + return; } + } + ovflocheck; + if(nflag) /* `.' or `e' makes fractional */ + *dicq = '\0', + yylval= bigscan(dicp); else + { double r=0.0; + if(dicq-dicp>60) /* this allows 59 chars */ + /* scanf crashes, on VAX, gives wrong answers, on ORION 1/05 */ + { syntax("illegal floating point constant (too many digits)\n"); + return; } + *dicq = '\n'; + sscanf(dicp,"%lf",&r); + yylval= sto_dbl(r); } +} + +void hexnumeral() /* added 21.11.2013 */ +{ word nflag=1; + dicq= dicp; + *dicq++ = c, c=getch(); /* 0 */ + *dicq++ = c, c=getch(); /* x */ + if(!isxdigit(c)&&c!='.')syntax("malformed hex number\n"); + while(c=='0'&&isxdigit(peekch()))c=getch(); /* skip zeros before first nonzero digit */ + while(isxdigit(c)) + *dicq++ = c, c=getch(); + ovflocheck; + if(c=='.'||tolower(c)=='p') /* hex float, added 20.11.19 */ + { double d; + if(c=='.') + { *dicq++ = c, c=getch(); + while(isxdigit(c)) + *dicq++ = c, c=getch(); } + if(c=='p') + { *dicq++ = c, c=getch(); + if(c=='+'||c=='-')*dicq++ = c, c=getch(); + if(!isdigit(c))syntax("malformed hex float\n"); + while(isdigit(c)) + *dicq++ = c, c=getch(); } + ovflocheck; + *dicq='\0'; + if(dicq-dicp>60||sscanf(dicp,"%lf",&d)!=1) + syntax("malformed hex float\n"); + else yylval= sto_dbl(d); + return; } + *dicq = '\0'; + yylval= bigxscan(dicp+2,dicq); +} + +void octnumeral() /* added 21.11.2013 */ +{ word nflag=1; + dicq= dicp; + if(!isdigit(c))syntax("malformed octal number\n"); + while(c=='0'&&isdigit(peekch()))c=getch(); /* skip zeros before first nonzero digit */ + while(isdigit(c)&&c<='7') + *dicq++ = c, c=getch(); + if(isdigit(c))syntax("illegal digit in octal number\n"); + ovflocheck; + *dicq = '\0'; + yylval= bigoscan(dicp,dicq); +} + +word namebucket[128]; /* each namebucket has a list terminated by 0, not NIL */ + +int hash(s) /* returns a value in {0..127} */ +char *s; +{ int h = *s; + if(h)while(*++s)h ^= *s; /* guard necessary to deal with s empty */ + return(h&127); +} + +int isconstrname(s) +char *s; +{ if(s[0]=='$')s++; + return isupper(*s); /* formerly !islower */ +} + +word getfname(x) +/* nonterminals have an added ' ', getfname returns the corresponding + function name */ +word x; +{ char *p = get_id(x); + dicq= dicp; + while(*dicq++ = *p++); + if(dicq-dicp<3)fprintf(stderr,"impossible event in getfname\n"),exit(1); + dicq[-2] = '\0'; /* overwrite last char */ + ovflocheck; + return(name()); +} + +int isnonterminal(x) +word x; +{ char *n; + if(tag[x]!=ID)return(0); + n = get_id(x); + return(n[strlen(n)-1]==' '); +} + +word name() +{ word q,h; + q= namebucket[h=hash(dicp)]; + while(q&&!is(get_id(hd[q])))q= tl[q]; + if(q==0) + { q = sto_id(dicp); + namebucket[h] = cons(q,namebucket[h]); + keep(dicp); } + else q= hd[q]; + return(q); } +/* note - keeping buckets sorted didn't seem to help (if anything slightly + slower) probably because ordering only relevant if name not present, and + outweighed by increased complexity of loop */ + +int inprelude=1; + +word make_id(n) /* used in mira_setup(), primdef(), predef(), all in steer.c */ +char *n; +{ word x,h; + h=hash(n); + x = sto_id(inprelude?keep(n):n); + namebucket[h] = cons(x,namebucket[h]); + return(x); } + +word findid(n) /* like name() but returns NIL rather than create new id */ +char *n; +{ word q; + q= namebucket[hash(n)]; + while(q&&!strcmp(n,get_id(hd[q]))==0)q= tl[q]; + return(q?hd[q]:NIL); } + +word *pnvec=0,nextpn,pn_lim=200; /* private name vector */ + +void reset_pns() /* (re)initialise private name space */ +{ nextpn=0; + if(!pnvec) + { pnvec=(word *)malloc(pn_lim*sizeof(word)); + if(pnvec==NULL)mallocfail("pnvec"); } +} + +word make_pn(val) /* create new private name with value val */ +word val; +{ if(nextpn==pn_lim) + { pn_lim+=400; + pnvec=(word *)realloc(pnvec,pn_lim*sizeof(word)); + if(pnvec==NULL)mallocfail("pnvec"); } + pnvec[nextpn]=strcons(nextpn,val); + return(pnvec[nextpn++]); +} + +word sto_pn(n) /* return n'th private name, extending pnvec if necessary */ +word n; +{ if(n>=pn_lim) + { while(pn_lim<=n)pn_lim+=400; + pnvec=(word *)realloc(pnvec,pn_lim*sizeof(word)); + if(pnvec==NULL)mallocfail("pnvec"); } + while(nextpn<=n) /* NB allocates all missing names upto and including nth*/ + pnvec[nextpn]=strcons(nextpn,UNDEF),nextpn++; + return(pnvec[n]); +} + +void mkprivate(x) /* disguise identifiers prior to removal from environment */ +word x; /* used in setting up prelude - see main() in steer.c */ +{ while(x!=NIL) + { char *s = get_id(hd[x]); + get_id(hd[x])[0] += 128; /* hack to make private internal name */ + x = tl[x]; } /* NB - doesn't change hashbucket */ + inprelude=0; +} + +word sl=100; + +void string() +{ word p; + word ch,badch=0; + c = getch(); + ch= getlitch(); + p= yylval= cons(NIL,NIL); + while(ch!=EOF&&rawch!='\"'&&rawch!='\n') + if(ch==-7) ch=getlitch(); else /* skip \& */ + if(ch<0){ badch=ch; break; } + else { p= tl[p]= cons(ch,NIL); + ch= getlitch(); } + yylval= tl[yylval]; + if(badch)errclass(badch,1); + if(rawch=='\n') + syntax("non-escaped newline encountered inside string quotes\n"); else + if(ch==EOF) + { if(echoing)putchar('\n'); + printf("syntax error: script ends inside unclosed string quotes - \n"); + printf(" \""); + while(yylval!=NIL&& sl-- ) + { putchar(hd[yylval]); + yylval= tl[yylval]; } + printf("...\"\n"); + acterror(); } +} + +int charclass() +{ word p; + word ch,badch=0,anti=0; + c = getch(); + if(c=='^')anti=1,c=getch(); + ch= getlitch(); + p= yylval= cons(NIL,NIL); + while(ch!=EOF&&rawch!='`'&&rawch!='\n') + if(ch==-7)ch=getlitch(); else /* skip \& */ + if(ch<0){ badch=ch; break; } + else { if(rawch=='-'&&hd[p]!=NIL&&hd[p]!=DOTDOT) + ch=DOTDOT; /* non-initial, non-escaped '-' */ + p= tl[p]= cons(ch,NIL); + ch= getlitch(); } + if(hd[p]==DOTDOT)hd[p]='-'; /* naturalise a trailing '-' */ + for(p=yylval;tl[p]!=NIL;p=tl[p]) /* move each DOTDOT to front of range */ + if(hd[tl[p]]==DOTDOT) + { hd[tl[p]]=hd[p],hd[p]=DOTDOT; + if(hd[tl[p]]>=hd[tl[tl[p]]]) + syntax("illegal use of '-' in [charclass]\n"); + } + yylval= tl[yylval]; + if(badch)errclass(badch,2); + if(rawch=='\n') + syntax("non-escaped newline encountered in char class\n"); else + if(ch==EOF) + { if(echoing)putchar('\n'); + printf( + "syntax error: script ends inside unclosed char class brackets - \n"); + printf(" ["); + while(yylval!=NIL&& sl-- ) + { putchar(hd[yylval]); + yylval= tl[yylval]; } + printf("...]\n"); + acterror(); } + return(anti); +} + +void reset_lex() /* called after an error */ +{ extern word errs,errline; + extern char *current_script; + /*printf("reset_lex()\n"); /* DEBUG */ + if(!commandmode) + { if(!errs)errs=fileinfo(get_fil(current_file),line_no); + /* convention, if errs set contains location of error, otherwise pick up + from current_file and line_no */ + if(tl[errs]==0&&(char *)hd[errs]==current_script) + /* at end of file, so line_no has been reset to 0 */ + printf("error occurs at end of "); + else printf("error found near line %ld of ",tl[errs]); + printf("%sfile \"%s\"\ncompilation abandoned\n", + (char *)hd[errs]==current_script?"":"%insert ", + (char *)hd[errs]); + if((char *)hd[errs]==current_script) + errline=tl[errs]==0?lastline:tl[errs],errs=0; + else { while(tl[linostack]!=NIL)linostack=tl[linostack]; + errline=hd[linostack]; } + /* tells editor where to find error - errline contains location of 1st + error in main script, errs is hereinfo of upto one error in %insert + script (each is 0 if not set) - some errors can set both */ + } + reset_state(); +} + +void reset_state() /* reset all global variables used by compiler */ +{ extern word TABSTRS,SGC,newtyps,algshfns,showchain,inexplist,sreds, + rv_script,idsused; + /* printf("reset_state()\n"); /* DEBUG */ + if(commandmode) + while(c!='\n'&&c!=EOF)c=getc(s_in); /* no echo */ + while(fileq!=NIL)fclose((FILE *)hd[hd[fileq]]),fileq=tl[fileq]; + insertdepth= -1; + s_in=stdin; + echostack=idsused=prefixstack=litstack=linostack=vergstack + =margstack=NIL; + prefix=0; prefixbase[0]='\0'; + echoing=verbosity&listing; + brct=inbnf=sreds=inlex=inexplist=commandmode=lverge=col=lmargin=0; + atnl=1; + rv_script=0; + algshfns=newtyps=showchain=SGC=TABSTRS=NIL; + c=' '; + line_no=0; + litmain=literate=0; + /* printf("exit reset_state()\n"); /* DEBUG */ +} + +/* end of MIRANDA LEX ANALYSER */ + |