12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163 |
- (* changes:
- 11-nov-92
- Now in m+ mode "AB""CD" is accespted as AB"CD value
- Earlier it were two values "AB" and "CD"
- *)
- {$DEFINE RAMSCAN}
- {$DEFINE SCANU}
- { Rigal integrated environment (c) 1991 Software house Riga }
- { }
- { }
- {$IFDEF SCANU}
- Unit Scan;
- interface
- uses
- {$IFDEF WIN} tmemstrm,winprocs, {$ENDIF}
- define;
- { Separate scanner for input strings and output Rigal objects }
- Procedure INITIALIZE_SCAN_VARIABLES;
- Procedure Scaner
- {*****************************************}
- (mode_parm:integer;
- {1=from file,2=list of strings,3=absulute address,
- 4=stream memory handle }
- filename:string80;
- options_str:string80;
- var rez:a; { result; set to NULL if input is absent }
- var erlist_parm:a; { error number list; set to NULL if no errors }
- strlist:a; { list of atoms }
- segm,ofs:longint); { parts of absolute address }
- implementation
- uses defpage,nef,poutlexu,doutu;
- {$ELSE}
- #include "define.p"
- #include "defpage.inc"
- #include "nef2.inc"
- #include "outs.inc"
- #include "scan.inc"
- {$ENDIF}
- {VARIABLES}
- type bigstring=
- {$IFDEF xSUN}
- varying array[127] of char;
- {$ELSE}
- string [127];
- {$ENDIF}
- var a1: a; { global variable for ONLY LOCAL use }
- {aa1 : aa ;} { --"-- }
- X : MPD; { --"-- }
- bl801 : bl80; { -- " -- }{ array[1..80] of char}
- k,kk:integer; { -"- }
- c1:char;
- saved_coord, { where current token began }
- coord_mark , { what was set by coordinate marker }
- line_byte_number , { number of totally read bytes till beginning
- of current line }
- last_mark_byte_number , { number of totally read bytes till
- last coordinate marker }
- old_line_length { length of the current line,
- used only for incrementation of "lyne_byte_number"}
- :word;
- DT:descriptortype; { type of last read token or control character }
- AADR:a; { A-space address of last read token }
- Linenumber:longint; { Current line number }
- Tokennumber:longint;{ Current token number }
- Errlist :a; { List of numbers of errors }
- Read_mode:integer; { 1,2,3 }
- PTR1 : PTR; { List pointer when read from list of atoms }
- {$IFDEF RAMSCAN}
- ABSPNT : Pointer;{ RAM pointer when read from RAM }
- {$ENDIF}
- inpfile : text;
- var
- c_lexics,
- pascal_lexics,
- row80_coord,
- byte_coord,
- mark_byte_coord,
- mark_only_coord,
- string_coord,
- char_coord,
- token_coord,
- collect_errors,
- screen_errors,
- to_uppercase,
- pascal_comment,
- c_comment,
- ada_comment,
- pascal_string,
- modula_string:boolean;
- const new_line_code=Chr(13); { Chr(0 is allowed too }
- endfile_code =Chr(26);
- var s:bigstring;
- s_for_val:bigstring; { added 17-FEB-92 }
- i:integer;
- type is_something=
- (is_control, is_letter, is_digit, is_underscore, is_printable,
- is_special, is_space , is_first_of_two);
- type pair=packed array[1..2]of char;
- var as:array[char]of is_something;
- isa : is_something;
- upcase_tab:array[char]of char;
- set_of_second_of_two:set of char;
- two_char_symbols_num:integer;
- two_char_symbols:array[1..30]of pair;
- b123 : record
- case integer of
- 1:(b1:char);
- 2:(b2:packed array[1..2]of char);
- 3:(b3:packed array[1..3]of char);
- end;
- var in_comment,in_string : boolean;
- var is_2quote : boolean; { if in_string then it is possible }
- var Str_constlen:longint;
- Str_const : string80; { array of char is allowed too }
- {===== SUN VERSION ===}
- {$IFDEF SUN}
- procedure inc{1}(var xxx:longint);
- begin xxx:=xxx+1; end;
- procedure inc{2}(var xxx:longint;yyy:longint);
- begin xxx:=xxx+yyy; end;
- procedure inc{3}(var xxx:integer);
- begin xxx:=xxx+1; end;
- procedure inc{4}(var xxx:integer;yyy:integer);
- begin xxx:=xxx+yyy; end;
- {$ENDIF}
- {=====}
- function CONT_CHAR_TO_DT(c:char):descriptortype;forward;
- function Getcoord:word;forward;
- procedure ER_LEX(er_number:integer);forward;
- {Procedure INITIALIZE_SCAN_VARIABLES;forward;}
- Procedure READ_FILE(var read_file_rez:a);forward;
- Function READ_ITEM:a;forward;
- Procedure READLINE;forward;
- Procedure PutAtom(j:integer);forward;
- Procedure PutIt(dd:descriptortype;j:integer);forward;
- Procedure PutIdent(j:integer);forward;
- Procedure PutFloat(j:integer;digits_before_dot,digits_after_dot:integer);forward;
- Procedure PutNumber;forward;
- Procedure PutStr;forward;
- Procedure PutStr2;forward;
- {Procedure Scaner;forward;}
- Procedure SETLEXICS;forward;
- Procedure StrAdd(c:char);forward;
- Procedure StrBegin;forward;
- Procedure Token;forward;
- Function TAKE_DIGITS(var jj:integer):longint;forward;
- Function TAKE_LETTERS:integer;forward;
- { uses global string, received from Readline procedure,
- produces global attributes of new one token read.
- Some variables also should be initialized if by Initial_token
- procedure at the start of the WHOLE programm }
- { This procedure is oriented to Turbo Pascal language }
- { Procedure call graph follows :
- /self\
- USEPAS -> SCAN -> READFILE -> READITEM -> ADDEL.nef
- -> LCONC.nef ER_LEX
- -> SETOPTIONS GETS1.defpage
- -> FIRST.nef TOKEN
- -> ASSIGN.system POINTR.defpage
- -> RESET.system LCONC.nef
- -> READLINE
- TOKEN -> ER
- -> READ_LETTERS_OR_DIGITS
- -> READ_DIGITS
- -> STRBEGIN
- -> PUTNUMBER
- -> StrAdd -> ER
- -> PUTSTR -> PUTATM.defpage
- -> PutAtom ->\
- -> PUTFLOAT -> PUTIT -> PUTATM.defpage
- -> PUTIDENT ->/
- }
- function CONT_CHAR_TO_DT(c:char):descriptortype;
- {**********************************************}
- begin
- Cont_char_to_dt:=descriptortype(c);
- end;
- function Getcoord:word;
- {***********************}
- begin
- if row80_coord then Getcoord:=i+Linenumber*80
- else
- if mark_only_coord then Getcoord:=coord_mark
- else
- if mark_byte_coord then
- Getcoord:=coord_mark+
- (line_byte_number+i-last_mark_byte_number)
- else
- if byte_coord then Getcoord:=line_byte_number+i
- else
- if string_coord then Getcoord:=Linenumber
- else
- if token_coord then Getcoord:=Tokennumber
- else
- Getcoord:=0;
- end;
- procedure ER_LEX(er_number:integer);
- {***********************************}
- var er_atom:a;co:word;erm:string80;
- begin
- co:=Getcoord;
- if collect_errors then begin
- Gets1(er_atom,x.sa);
- with x.snd^ do begin
- dtype:=number;
- cord:=co;
- val:=er_number;
- end;
- lconc(errlist {global} ,er_atom);
- end;
- if screen_errors then
- begin
- case er_number of
- 1 : erm:=' unexpected end of file before end of tree';
- 2 : erm:=' unexpected end of file within tree branch ';
- 3 : erm:=' unexpected end of file within tree ';
- 4 : erm:=' unexpected end of file before end of list ';
- 5 : erm:=' unexpected end of tree ';
- 6 : erm:=' unexpected end of list or another control character';
- 7 : erm:=' unexpected end of file in name ';
- 8 : erm:=' unexpected end of file in named object ';
- 9 : erm:=' unexpected control character ';
- 10 : erm:=' too long atom ( more than 80 bytes ) ';
- 11 : erm:=' end of string constant not found in the current line';
- 12 : erm:=' end of file before end of comment';
- 13 : erm:=' control character within comment ';
- 14 : erm:=' control character within string constant';
- end;
- writeln('Lexical error :',erm);
- writeln('Line=',linenumber,' column=',i);
- end;
- end;
- Procedure INITIALIZE_SCAN_VARIABLES;
- {***********************************}
- var c:char;
- begin
- for c:=Chr(0) to Chr(255) do upcase_tab[c]:=c;
- for c:=Chr(97) to Chr(122) do upcase_tab[c]:=Chr(ord(c)-32);{ASCII}
- (* for c:=Chr(160) to Chr(175) do upcase_tab[c]:=Chr(ord(c)-32); *){Russian}
- (* for c:=Chr(224) to Chr(239) do upcase_tab[c]:=Chr(ord(c)-60);*){Russian}
- { German for Windows }
- upcase_tab[chr(228)]:=chr(196);
- upcase_tab[chr(246)]:=chr(214);
- upcase_tab[chr(252)]:=chr(220);
- for c:=Chr(0) to Chr(31) do as[c]:=is_control;
- for c:=Chr(128) to Chr(255) do as[c]:=is_letter; { Russian and pseudographics }
- for c:=Chr(32) to Chr(127) do as[c]:=is_printable; { not used actually }
- as[new_line_code]:=is_control;
- as[' ']:=is_space;
- as[Chr(9)] :=is_space;
- { these are allowed to be First letter of identifiers }
- for c:='A'to'Z' do as[c]:=is_letter;
- for c:='a'to'z' do as[c]:=is_letter;
- for c:='0'to'9' do as[c]:=is_digit;
- { allowed to be non-first letter of odentifier }
- as['_']:=is_underscore;
- { All the rest settings - see procedure SETLEXICS }
- end;
- Procedure READ_FILE(var read_file_rez:a);
- {******************************}
- { reads whole input, produces list of items }
- label 99;
- begin read_file_rez:=NULL;
- repeat a1:=read_item;
- if (dt=start_tree)or(dt=end_tree)or(dt=start_list)or
- (dt=end_list)or(dt=name_obj) then begin ER_LEX(6);goto 99;end;
- if DT<>eof_desk then lconc(read_file_rez,a1);
- until DT=eof_desk;
- 99: end;
- Function READ_ITEM:a;
- {********************}
- label 99;
- var aadr1:a; result,temp_res:a;
- begin
- READ_ITEM:=NULL; { default value for exits with errors }
- result:=NULL;
- TOKEN;
- case DT of
- atom,idatom,tatom,fatom,keyword:
- begin gets1(result,x.sa);
- with x.sad^ do begin
- dtype:=DT; cord:=saved_coord;
- name := AADR;
- end
- end;
- number:
- begin gets1(result,x.sa);
- with x.snd^ do begin
- dtype:=DT; cord:=saved_coord;
- val:= AADR; { is set in TOKEN .. is_digit }
- end;
- end;
- start_tree:
- begin result:=NULL;
- repeat
- TOKEN;
- aadr1:=AADR; { to save }
- if (DT=idatom)or(DT=atom)or(DT=tatom)or(DT=keyword)
- { What is allowed selector in scaner input ?
- Normally - idatom only, but here
- atom is allowed too - for experiment purposes }
- then
- begin
- a1:=Read_item;
- if DT=end_list then begin ER_LEX(6); goto 99; end;
- if DT=eof_desk then begin ER_LEX(1);goto 99;end;
- Addel3(result,aadr1,a1);
- end
- else if DT<>end_tree then begin ER_LEX(2);goto 99;end;
- ;
- until (DT=end_tree)or(DT=eof_desk);
- if DT=eof_desk then begin ER_LEX(3);goto 99;end;
- DT:=complex_desk; { to ignore analysis in upper level of
- recursion }
- end;
- start_list :
- begin
- result:=NULL;
- repeat
- a1:=read_item;
- if DT=eof_desk then begin ER_LEX(4);goto 99;end;
- if (DT<>end_list) then lconc(result,a1);
- until DT=end_list;
- DT:=complex_desk;
- end;
- end_tree : begin ER_LEX(5); goto 99; end;
- end_list : begin end; {immodiately returns to the upper level}
- name_obj: begin
- temp_res:=read_item;
- if DT=end_list then begin ER_LEX(6); goto 99; end;
- if DT=eof_desk then begin ER_LEX(7); goto 99; end;
- result :=read_item;
- if DT=end_list then begin ER_LEX(6); goto 99; end;
- if DT=eof_desk then begin ER_LEX(8); goto 99; end;
- if result<>NULL then begin
- points(result,x.sa);
- if (x.smld^.dtype=listmain)or(x.smtd^.dtype=treemain)
- then x.smtd^.name:=temp_res;
- end;
- DT:=complex_desk;
- end;
- eof_desk:begin end; { returns to the upper level }
- {$IFDEF xSUN}
- OTHERWISE
- {$ELSE}
- ELSE
- {$ENDIF}
- begin ER_LEX(9);goto 99; end { impossible value }
- end; { case }
- read_item:=result;
- 99: end; { Procedure Read_item }
- Procedure READLINE;
- {***************************}
- { sets new values for "s" and "i" global variables }
- label 99;
- begin
- i:=1; { In any case, so; only here it is initialized.
- it is column number.
- Variable "s" is string only for speed purposes;
- It plays exactly as Packed array of char,
- never used as whole and the Length byte is never used }
- Linenumber:=linenumber+1;
- line_byte_number:=line_byte_number+old_line_length;
- case read_mode of
- 1:
- begin
- if eof(inpfile) then begin s[1]:=endfile_code;close(inpfile);end
- else
- begin readln(inpfile,s);s[length(s)+1]:=new_line_code;
- old_line_length:=length(s);
- end;
- end;
- 2:
- begin
- if PTR1.nel=0 then begin s[1]:=endfile_code;goto 99;end;
- if PTR1.cel=0
- then begin
- s[1]:=new_line_code;
- old_line_length:=0;
- end
- else begin
- Pointr(PTR1.cel,X.sa);
- with x.sad^ do begin
- if (dtype=atom)or (dtype=idatom)or
- (dtype=fatom)or (dtype=tatom)or (dtype=keyword)
- then begin
- pointa(name,bl801,k);
- for kk:=1 to k do s[kk]:=bl801[kk];
- old_line_length:=k;
- s[k+1]:=new_line_code;
- end
- else
- begin
- s[1]:=new_line_code; { other objects are ignored }
- old_line_length:=0;
- end
- end; { with }
- end; { <>0 }
- NEXT(PTR1);
- end; {=2}
- 3:
- begin
- {$IFDEF RAMSCAN}
- k:=0;
- repeat c1:=char(Pointer(ABSPNT)^);
- write(':',ord(c1));
- inc{1}(longint(ABSPNT));
- inc{1}(k);
- s[k]:=c1;
- until (c1=new_line_code)or(k=126);
- if (k=126) then
- begin
- writeln('SCANNER DIRECT ACCESS WARNING !');
- s[127]:=new_line_code;
- end;
- old_line_length:=k;
- {$ENDIF}
- end;
- 4: begin
- {$IFDEF WIN}
- (* reads by-byte from global memory stream *)
- s:='';
- k:=0;
- while true do begin
- byte(c1):=SM^.readbyte;
- if c1=#13 then begin (* eoln symbol reached *)
- old_line_length:=k;
- (* this variable must refer to last symbol before eoln *)
- s[k+1]:=new_line_code;Exit;end;
- if c1=#26 then begin (* end of file reached *)
- old_line_length:=k;
- inc(k);
- s[k]:=endfile_code;
- SM^.Close;Dispose(SM,Done);Exit;end;
- if c1<>#10 then
- begin
- Inc(k);s[k]:=c1;inc(byte(s[0])); (* adds one character to string *)
- end;
- end;
- {$ENDIF}
- end;
- end; (* case *)
- 99:;
- end;
- Procedure PutAtom(j:integer);
- {***********************}
- begin
- PutIt(atom,j);
- end;
- Procedure PutIt(dd:descriptortype;j:integer);
- {***********************}
- begin
- if j>80 then begin DT:=Keyword;ER_LEX(10);j:=80; end else DT:=dd;
- putatm(s[i],j,AADR);
- saved_coord:=Getcoord;
- Tokennumber:=Tokennumber+1;
- end;
- Procedure PutIdent(j:integer);
- {***********************}
- begin
- PutIt(idatom,j);
- end;
- Procedure PutFloat(j:integer;digits_before_dot,digits_after_dot:integer);
- {***********************}
- var REA_VAL:real; ii:integer;
- AR:array[1..sizeof(real)+2]of char;
- begin
- (* Val2(Copy(s,i,j),REA_VAL,ii ignored ); *)
- system.Val(Copy(s,i,j),REA_VAL,ii);
- if ii<>0 then
- begin
- if j>80 then j:=80;
- DT:=keyword; (* WRONG REAL CONSTANT *)
- putatm(s[i],j,aadr);
- end
- else
- begin
- ii:=sizeof(real)+2;
- for j:=1 to ii-2 do
- AR[j]:=REAL_CHAR(REA_VAL)[j];
- AR[ii-1]:=chr(digits_before_dot);
- AR[ii] :=chr(digits_after_dot);
- putatm(AR[1],ii,AADR);
- DT:=fatom;
- end;
- (* writeln(' BEF, AFT =',digits_before_dot,' ',digits_after_dot); *)
- saved_coord:=Getcoord;
- inc{1}(Tokennumber);
- end;
- Procedure PutNumber;
- {***********************}
- begin
- DT:=number;
- saved_coord:=Getcoord;
- inc{1}(Tokennumber);
- end;
- Procedure PutStr;
- {***********************}
- begin
- if str_constlen>80 then
- begin
- str_constlen:=80;
- DT:=keyword; (* TOO LONG STRING CONSTANT *)
- end
- else
- DT:=TATOM;
- putatm(Str_const[1],Str_constlen, AADR);
- in_string:=false;
- Tokennumber:=Tokennumber+1;
- end;
- Procedure PutStr2;
- {***********************}
- begin
- if str_constlen>80 then str_constlen:=80;
- putatm(Str_const[1],Str_constlen, AADR);
- DT:=keyword;
- in_string:=false;
- Tokennumber:=Tokennumber+1;
- end;
- { this procedure used only for Modula2 or C-style string constants like 'x', -
- to show difference between 'x' and "x".
- Access via #_KEYWORD built_in rule is possible.
- When input use 'm+p-' !
- When output such tokens you should write
- IF #_KEYWORD($X) -> OUT <] @ '"' $X '"' ELSIF T-> OUT <] $X FI;
- }
- Procedure Scaner
- {*****************************************}
- (mode_parm:integer;
- {1=from file,2=list of strings,3=absulute address}
- filename:string80;
- options_str:string80;
- var rez:a; { result; set to NULL if input is absent }
- var erlist_parm:a; { error number list; set to NULL if no errors }
- strlist:a; { list of atoms }
- segm,ofs:longint); { parts of absolute address }
- LABEL 1,99;
- {INNER FUNCTION}
- Function Setop(c:char):boolean;
- begin
- Setop:=false;
- {$IFDEF xSUN}
- k:=Index(options_str,c);
- {$ELSE}
- k:=Pos(c,options_str);
- {$ENDIF}
- if k<>0 then
- if options_str[k+1]<>'-' then Setop:=true;
- end;
- begin
- read_mode:=mode_parm ; { save for global use }
- rez:=NULL;
- erlist_parm:=NULL;
- ERRLIST { global }:=NULL;
- { INITIALIZES ALL OPTIONS }
- { DEFAULTS }
- options_str:=options_str+'D-C-P+p+m-U+S+O+s-t-L-A+R+Y-B-N-';
- { to every option su,me default value should be given,
- otherwise the flag remains uninitialized }
- {ERRORS}
- screen_errors :=Setop('S');
- collect_errors:=Setop('O');
- {COMMENTS}
- ada_comment :=Setop('D');
- c_comment :=Setop('C');
- pascal_comment:=Setop('P');
- {STRING CONSTANTS}
- pascal_string :=Setop('p');
- modula_string :=Setop('m');
- to_uppercase :=Setop('U');
- {COORDINATE}
- string_coord :=Setop('s');
- token_coord :=Setop('t');
- row80_coord :=Setop('R');
- byte_coord :=Setop('Y');
- mark_byte_coord:=Setop('B');
- mark_only_coord:=Setop('N');
- {LANGUAGE_SPECIFIC LEXICS}
- c_lexics :=Setop('L');
- pascal_lexics :=Setop('A');
- { INITIALIZES LANGUAGE-SPECIFIC SETTINGS }
- SETLEXICS;
- { INITIALIZES "SESSION" FLAGS (alf order)}
- coord_mark:=0;
- in_comment:=false;
- in_string :=false;
- is_2quote :=false;
- last_mark_byte_number:=0;
- line_byte_number:=0;
- linenumber:=0;
- old_line_length :=0;
- str_constlen:=0;
- tokennumber:=0;
- { INITIALIZES PHYSICAL LEVEL READING }
- case read_mode of
- 1: begin { READ FROM FILE }
- if not existfile(filename)then begin Rez:=0;goto 1;end;
- {$IFDEF xSUN}
- Reset(inpfile,filename);
- {$ELSE}
- Assign(inpfile,filename);
- Reset(inpfile);
- {$ENDIF}
- readline; { reads first line of file }
- end;
- 2:
- begin { READ FROM LIST }
- if strlist=NULL then begin Rez:=0; goto 99; end;
- First(strlist,PTR1); { sets global list-pointer PTR1 }
- if (PTR1.ptrtype<>ptrlist) or
- (PTR1.nel=0) then begin Rez:=0; goto 99; end;
- readline; { reads line from current list-pointer position }
- end;
- 3:
- begin { READ FROM MS-DOS RAM MEMORY }
- {$IFDEF RAMSCAN}
- { Normalizing memory address }
- { Is it necessary for Windows model ? }
- { segm:=segm+ofs div 16;
- ofs:=ofs mod 16; }
- ABSPNT:=system.Ptr(segm,ofs); { This place is DOS-dependent !!! }
- if char (ABSPNT^) <> new_line_code
- then begin Rez:=0; goto 99; end;
- ABSPNT:=Pointer(longint(ABSPNT)+1);
- readline;
- {$ENDIF}
- end;
- 4: begin
- {$IFDEF WIN}
- New(SM,Init);
- SM^.open(segm);
- if (SM^.GETSIZE>128000)or(SM^.GETSIZE<0) THEN
- begin
- MESSAGEBOX(0,'Wrong handle to LDT stream',nil,16);
- rez:=0; goto 99; end
- else if (SM^.GETSIZE=0) then
- begin
- MESSAGEBOX(0,'Empty LDT stream',nil,16);
- rez:=0; goto 99; end
- else
- begin
- readline;
- end;
- {$ENDIF}
- end;
- end; (* case *)
- READ_FILE(REZ); { Main call ... }
- erlist_parm:=ERRLIST {global} ;
- 1:;99:;
- end;{ procedure Scaner }
- Procedure SETLEXICS;
- {***********************}
- var j:integer;
- begin
- if pascal_lexics then begin { changes in standard, necessary for Pascal}
- as['{']:=is_special;
- as['#']:=is_special;
- as['$']:=is_special;
- as['%']:=is_special;
- as['''']:=is_special;
- as['(']:=is_special;
- two_char_symbols[1]:=':=';
- two_char_symbols[2]:='<=';
- two_char_symbols[3]:='>=';
- two_char_symbols[4]:='**';
- two_char_symbols[5]:='..';
- two_char_symbols[6]:='<>';
- two_char_symbols_num:=6;
- set_of_second_of_two:=['=','*','.','>'];
- for j:=1 to two_char_symbols_num do
- as[two_char_symbols[j][1]]:=is_first_of_two;
- if modula_string then
- begin
- as['"']:=is_special;
- as['{']:=is_printable;
- end
- end
- else
- if c_lexics then begin
- as['_']:=is_letter;
- as['$']:=is_letter;
- two_char_symbols[1]:='->';
- two_char_symbols[2]:='++';
- two_char_symbols[3]:='--';
- two_char_symbols[4]:='>>';
- two_char_symbols[5]:='<<';
- two_char_symbols[6]:='==';
- two_char_symbols[7]:='+=';
- two_char_symbols[8]:='*=';
- two_char_symbols[9]:='-=';
- two_char_symbols[10]:='/=';
- two_char_symbols[11]:='%=';
- two_char_symbols[12]:='&=';
- two_char_symbols[13]:='^=';
- two_char_symbols[14]:='|=';
- two_char_symbols[15]:='!=';
- two_char_symbols_num:=15;
- set_of_second_of_two:=['>','+','-','<','='];
- for j:=1 to two_char_symbols_num do
- as[two_char_symbols[j][1]]:=is_first_of_two;
- as['<']:=is_special; { used to process <<= }
- as['>']:=is_special; { used to process >>= }
- as['/']:=is_special; { used to process /* }
- { otherwise isa:=is_first_of_two
- is assigned ! }
- as['''']:=is_special;
- as['"']:=is_special;
- end;
- end;
- Procedure StrAdd(c:char);
- {***********************}
- begin
- inc{1}(Str_constlen);
- if Str_constlen<=80 then Str_const[Str_constlen]:=c;
- end;
- Procedure StrBegin;
- {******************}
- begin
- saved_coord:=GetCoord;
- { It will be used when PutStr works and TOKEN EXITs - in READ_ITEM}
- in_string:=true;
- Str_constlen:=0;
- end;
- Procedure Token;
- {********************}
- label 1,99;
- var J,i_saved:integer; { positions }
- var digits_before_dot,digits_after_dot:integer;
- begin
- { at beginning time
- "i" - is already set to character in string "s";
- array "as" is already initialized;
- coordinate mode "coord_mode" is already known;
- }
- with b123 do begin { variant record ,b1=b2[1]=b3[1]; b2[2]=b3[2] }
- 1: { we return to this label if token is not ready still }
- b1:=s[i];
- isa:=as[b1]; { type of this character }
- if isa=is_control then begin {1}
- if b1=new_line_code
- then
- begin
- Readline; { skips to next line, sets new "s" and "i" }
- if in_string then begin ER_LEX(11); PutStr2; goto 99; end;
- { ERROR= end of line appears in string constant }
- goto 1;
- end
- else
- if b1=endfile_code
- then
- begin
- if in_comment then ER_LEX(12);
- { ERROR = end of file appears in comment }
- DT:=eof_desk;
- goto 99;
- end
- else
- { others are control characters; }
- { they set "DT" field and then form Rigal list/tree structure }
- begin{2}
- if in_comment then begin ER_LEX(13); in_comment:=false; end;
- { ERROR = control char in comment }
- if in_string then begin ER_LEX(14); PutStr2; goto 99;
- { will take control character next time }
- end;
- { ERROR = control char in string }
- DT:=Cont_char_to_dt(b1);
- I:=I+1;
- If (DT=set_coord)and(as[s[i]]=is_digit)
- { this control character sets coordinate to value given in input }
- then begin
- Coord_mark:=TAKE_DIGITS(J);
- Last_mark_byte_number:=line_byte_number+i;
- I:=I+J;
- goto 1; { DOES NOT RETURNS ! }
- end;
- goto 99;
- end; {2}
- {NEVER HERE}
- end; {1}
- { all the following executes AFTER check of is_control }
- {b3[1]:=s[i];} { this character }
- b3[2]:=s[i+1]; { next }
- b3[3]:=s[i+2]; { next }
- if in_comment { CHECK FOR END OF COMMENT }
- then begin {1}
- { we are in comment;
- here only comments that have some special end mark are processed }
- if pascal_comment then begin {2}
- if b1='}' then begin inc{3}(i); in_comment:=false; goto 1;end;
- if b2='*)' then begin inc{4}(i,2); in_comment:=false; goto 1; end;
- end {2}
- else
- if c_comment then begin {2}
- if b2='*/' then begin inc{4}(i,2); in_comment:=false; goto 1;end;
- end {2}
- ;
- inc{3}(i);
- goto 1;
- end;{1}
- if in_string
- { CHECK FOR END OF STRING or SOMETHING SPECIAL IN STRING }
- then begin {1}
- if pascal_string then begin {2}
- if b1='''' then begin {3}
- if b2[2]='''' then begin StrAdd('''');
- inc{4}(i,2);
- goto 1;end
- { this allow to save '''' as '' }
- else begin Putstr;inc{3}(i);goto 99;end;
- end {3}
- else begin {3}
- StrAdd(b1);
- inc{3}(i);
- goto 1;
- end {3}
- end; {2}
- if modula_string then begin {2}
- if is_2quote and (b1='"') then
- begin
- if b2[2]='"' then begin StrAdd('"');Inc{4}(i,2);goto 1;end
- else begin Putstr;inc{3}(i);goto 99;end;
- end;
- if not(is_2quote) and (b1='''') then
- begin Putstr2;inc{3}(i);goto 99;end
- else begin {3}
- if b1='\' then
- begin
- StrAdd(b1);
- StrAdd(b2[2]);
- { this allows to save \? as ? even if \" appears}
- inc{4}(i,2);
- goto 1;
- end;
- StrAdd(b1);
- inc{3}(i);
- goto 1;
- end; {3}
- end {2}
- end ; {1}
- { all the following executes AFTER check for in_string & in_comment }
- if isa=is_special then begin{1}
- if pascal_comment then begin{2}
- if (b2='(*')
- then begin in_comment:=true;inc{4}(i,2);goto 1;end;
- if (b1='{')and(b2[2]<>'$')
- then begin in_comment:=true;inc{3}(i) ;goto 1;end;
- end{2}
- else
- if c_comment then begin{2}
- if b2='/*' then begin in_comment:=true;inc{4}(i,2);goto 1;end;
- end {2}
- else
- if ada_comment then begin {2}
- if b2='--' then begin Readline;goto 1; end;
- end {2}
- ;
- if pascal_string then begin {2}
- if b1='''' then begin StrBegin;is_2quote:=false;inc{3}(i);goto 1;end;
- end {2}
- else
- if modula_string then begin {2}
- if b1='''' then begin StrBegin;is_2quote:=false;inc{3}(i);goto 1;end;
- if b1='"' then begin StrBegin;is_2quote:=true;inc{3}(i);goto 1;end;
- end {2}
- ;
- if pascal_lexics then begin {2}
- { SPECIALLY TAKES TURBO PASCAL DIRECTIVE-COMMENTS }
- if (b2='{$') then begin {3}
- j:=0;
- repeat inc{3}(j);
- until (s[i+j]='}')or(as[s[i+j]]=is_control);
- if s[i+j]='}' then inc{3}(j);
- PutAtom(j);
- inc{4}(i,j);
- goto 99;
- end; {3}
- { ADDITIONAL SYMBOLS }
- if (b1='#')or(b1='$')or(b1='%')
- then begin {3}
- inc{3}(i);
- J:=TAKE_LETTERS; { starts from i-th position }
- i:=i-1;
- PutAtom(J+1); { takes token from i-th position }
- inc{4}(i,j+1);
- goto 99;
- end; {3}
- end{2}
- else
- if c_lexics then begin {2} { SYMBOLS= > < }
- { SPECIAL CASES FOR C LANGUAGE }
- if (b3='<<=')or(b3='>>=') then begin
- PutAtom(3); inc{4}(i,3); goto 99;
- end;
- isa:=is_first_of_two;
- { !! in C case <<, >>, >=, <= will be tested further
- in TWO_char_symbols section, hence we go to there
- using assignment to "ISA" }
- end {2}
- end; {1}
- if isa=is_space then begin inc{3}(i); goto 1;end;
- if isa=is_letter then begin
- J:=TAKE_LETTERS;
- PutIdent(J);
- I:=I+J;
- goto 99;
- end;
- if isa=is_digit then begin {1}
- if c_lexics then begin {2}
- { ADDITIONAL SYMBOLS; HERE hex and octal numbers are saved as
- normal ATOMs }
- if (b1='0')
- then begin {3}
- inc{3}(i);
- J:=TAKE_LETTERS; { starts from i-th position }
- i:=i-1;
- PutAtom(J+1); { takes token from i-th position }
- inc{4}(i,j+1);
- goto 99;
- end; {3}
- end; {2}
- i_saved:=i; { remember starting position }
- AADR:=TAKE_DIGITS(J);
- digits_before_dot:=j;
- if (j<10)and
- ( ((s[i+j]='.')and(s[i+j+1]='.'))
- or
- (not(s[i+j]in['e','E','.'])) )
- then begin PutNumber;inc{4}(i,j);goto 99;end;
- inc{4}(i,j);
- if s[i]='.' then begin
- inc{3}(i);
- AADR:=TAKE_DIGITS(J);
- digits_after_dot:=j;
- inc{4}(i,j);
- end;
- if s[i] in ['e','E'] then
- begin
- digits_before_dot:=0;
- digits_after_dot:=0;
- inc{3}(i);
- if s[i] in ['+','-'] then inc{3}(i);
- AADR:=TAKE_DIGITS(J);
- inc{4}(i,j);
- end;
- J:=i-i_saved;
- i:=i_saved; { to set "i" to starting position }
- PutFloat(J,digits_before_dot,digits_after_dot);
- inc{4}(i,j);
- goto 99;
- end;{1}
- if isa=is_first_of_two then begin {1}
- if b2[2] in set_of_second_of_two then
- for j:=1 to two_char_symbols_num do
- if b2=two_char_symbols[j] then begin
- PutAtom(2);I:=I+2;goto 99;
- end
- end;{1}
- PutAtom(1);inc{3}(i);
- end; { with}
- 99:;
- end; { Procedure }
- Function TAKE_LETTERS:integer;
- {**************************************}
- label 99;
- { Reads only letters, digits and underscores.
- returns number of characters read }
- var JJ:integer; c:char;
- begin
- JJ:=0;
- while true do begin
- c:=s[i+jj];
- isa:=as[c];
- if (isa=is_letter)or(isa=is_digit)or(isa=is_underscore)
- then begin
- if to_uppercase then s[i+jj]:=upcase_tab[c];
- inc{3}(jj)
- end
- else begin
- TAKE_LETTERS:=jj;goto 99;
- end;
- end;
- 99:;
- end;
- Function TAKE_DIGITS(var jj:integer):longint;
- {************************************************}
- label 99;
- var summator:longint; c:char;
- begin
- JJ:=0; summator:=0;
- while true do begin
- c:=s[i+jj];
- if as[c]=is_digit then
- begin summator:=summator*10+ord(c)-ord('0');inc{3}(jj);end
- else begin TAKE_DIGITS:=summator;goto 99;end;
- end;
- 99:;
- end;
- (*begin*)
- { PRESS F8 when debugging ! }
- (*INITIALIZE_SCAN_VARIABLES;*)
- end.
|