USCAN.PAS 31 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229
  1. (* changes:
  2. 11-nov-92
  3. Now in m+ mode "AB""CD" is accespted as AB"CD value
  4. Earlier it were two values "AB" and "CD"
  5. *)
  6. {$DEFINE MIF}
  7. {$DEFINE RAMSCAN}
  8. {$DEFINE SCANU}
  9. { Rigal integrated environment (c) 1991 Software house Riga }
  10. { }
  11. { }
  12. {$IFDEF SCANU}
  13. Unit Scan;
  14. interface
  15. uses
  16. {$IFDEF WIN} tmemstrm,winprocs, {$ENDIF}
  17. define;
  18. { Separate scanner for input strings and output Rigal objects }
  19. Procedure INITIALIZE_SCAN_VARIABLES;
  20. Procedure Scaner
  21. {*****************************************}
  22. (mode_parm:integer;
  23. {1=from file,2=list of strings,3=absulute address,
  24. 4=stream memory handle }
  25. filename:string80;
  26. options_str:string80;
  27. var rez:a; { result; set to NULL if input is absent }
  28. var erlist_parm:a; { error number list; set to NULL if no errors }
  29. strlist:a; { list of atoms }
  30. segm,ofs:longint); { parts of absolute address }
  31. implementation
  32. uses defpage,nef,poutlexu,doutu;
  33. {$ELSE}
  34. #include "define.p"
  35. #include "defpage.inc"
  36. #include "nef2.inc"
  37. #include "outs.inc"
  38. #include "scan.inc"
  39. {$ENDIF}
  40. {VARIABLES}
  41. type bigstring=
  42. {$IFDEF xSUN}
  43. varying array[127] of char;
  44. {$ELSE}
  45. string [127];
  46. {$ENDIF}
  47. var a1: a; { global variable for ONLY LOCAL use }
  48. {aa1 : aa ;} { --"-- }
  49. X : MPD; { --"-- }
  50. bl801 : bl80; { -- " -- }{ array[1..80] of char}
  51. k,kk:integer; { -"- }
  52. c1:char;
  53. saved_coord, { where current token began }
  54. coord_mark , { what was set by coordinate marker }
  55. line_byte_number , { number of totally read bytes till beginning
  56. of current line }
  57. last_mark_byte_number , { number of totally read bytes till
  58. last coordinate marker }
  59. old_line_length { length of the current line,
  60. used only for incrementation of "lyne_byte_number"}
  61. :word;
  62. DT:descriptortype; { type of last read token or control character }
  63. AADR:a; { A-space address of last read token }
  64. Linenumber:longint; { Current line number }
  65. Tokennumber:longint;{ Current token number }
  66. Errlist :a; { List of numbers of errors }
  67. Read_mode:integer; { 1,2,3 }
  68. PTR1 : PTR; { List pointer when read from list of atoms }
  69. {$IFDEF RAMSCAN}
  70. ABSPNT : Pointer;{ RAM pointer when read from RAM }
  71. {$ENDIF}
  72. inpfile : text;
  73. var
  74. c_lexics,
  75. pascal_lexics,
  76. row80_coord,
  77. byte_coord,
  78. mark_byte_coord,
  79. mark_only_coord,
  80. string_coord,
  81. char_coord,
  82. token_coord,
  83. collect_errors,
  84. screen_errors,
  85. to_uppercase,
  86. pascal_comment,
  87. c_comment,
  88. ada_comment,
  89. pascal_string,
  90. modula_string:boolean;
  91. const new_line_code=Chr(13); { Chr(0 is allowed too }
  92. endfile_code =Chr(26);
  93. var s:bigstring;
  94. s_for_val:bigstring; { added 17-FEB-92 }
  95. i:integer;
  96. type is_something=
  97. (is_control, is_letter, is_digit, is_underscore, is_printable,
  98. is_special, is_space , is_first_of_two);
  99. type pair=packed array[1..2]of char;
  100. var as:array[char]of is_something;
  101. isa : is_something;
  102. upcase_tab:array[char]of char;
  103. set_of_second_of_two:set of char;
  104. two_char_symbols_num:integer;
  105. two_char_symbols:array[1..30]of pair;
  106. b123 : record
  107. case integer of
  108. 1:(b1:char);
  109. 2:(b2:packed array[1..2]of char);
  110. 3:(b3:packed array[1..3]of char);
  111. end;
  112. var in_comment,in_string : boolean;
  113. var is_2quote : boolean; { if in_string then it is possible }
  114. var Str_constlen:longint;
  115. Str_const : string80; { array of char is allowed too }
  116. {===== SUN VERSION ===}
  117. {$IFDEF SUN}
  118. procedure inc{1}(var xxx:longint);
  119. begin xxx:=xxx+1; end;
  120. procedure inc{2}(var xxx:longint;yyy:longint);
  121. begin xxx:=xxx+yyy; end;
  122. procedure inc{3}(var xxx:integer);
  123. begin xxx:=xxx+1; end;
  124. procedure inc{4}(var xxx:integer;yyy:integer);
  125. begin xxx:=xxx+yyy; end;
  126. {$ENDIF}
  127. {=====}
  128. function CONT_CHAR_TO_DT(c:char):descriptortype;forward;
  129. function Getcoord:word;forward;
  130. procedure ER_LEX(er_number:integer);forward;
  131. {Procedure INITIALIZE_SCAN_VARIABLES;forward;}
  132. Procedure READ_FILE(var read_file_rez:a);forward;
  133. Function READ_ITEM(var pghead:a):a;forward; {NEW!}
  134. Procedure READLINE;forward;
  135. Procedure PutAtom(j:integer);forward;
  136. Procedure PutIt(dd:descriptortype;j:integer);forward;
  137. Procedure PutIdent(j:integer);forward;
  138. Procedure PutFloat(j:integer;digits_before_dot,digits_after_dot:integer);forward;
  139. Procedure PutNumber;forward;
  140. Procedure PutStr;forward;
  141. Procedure PutStr2;forward;
  142. {Procedure Scaner;forward;}
  143. Procedure SETLEXICS;forward;
  144. Procedure StrAdd(c:char);forward;
  145. Procedure StrBegin;forward;
  146. Procedure Token;forward;
  147. Function TAKE_DIGITS(var jj:integer):longint;forward;
  148. Function TAKE_LETTERS:integer;forward;
  149. { uses global string, received from Readline procedure,
  150. produces global attributes of new one token read.
  151. Some variables also should be initialized if by Initial_token
  152. procedure at the start of the WHOLE programm }
  153. { This procedure is oriented to Turbo Pascal language }
  154. { Procedure call graph follows :
  155. /self\
  156. USEPAS -> SCAN -> READFILE -> READITEM -> ADDEL.nef
  157. -> LCONC.nef ER_LEX
  158. -> SETOPTIONS GETS1.defpage
  159. -> FIRST.nef TOKEN
  160. -> ASSIGN.system POINTR.defpage
  161. -> RESET.system LCONC.nef
  162. -> READLINE
  163. TOKEN -> ER
  164. -> READ_LETTERS_OR_DIGITS
  165. -> READ_DIGITS
  166. -> STRBEGIN
  167. -> PUTNUMBER
  168. -> StrAdd -> ER
  169. -> PUTSTR -> PUTATM.defpage
  170. -> PutAtom ->\
  171. -> PUTFLOAT -> PUTIT -> PUTATM.defpage
  172. -> PUTIDENT ->/
  173. }
  174. function CONT_CHAR_TO_DT(c:char):descriptortype;
  175. {**********************************************}
  176. begin
  177. Cont_char_to_dt:=descriptortype(c);
  178. {$IFDEF MIF}
  179. if c='<' then cont_char_to_dt:=start_list;
  180. if c='>' then cont_char_to_dt:=end_list;
  181. {$ENDIF}
  182. end;
  183. function Getcoord:word;
  184. {***********************}
  185. begin
  186. if row80_coord then Getcoord:=i+Linenumber*80
  187. else
  188. if mark_only_coord then Getcoord:=coord_mark
  189. else
  190. if mark_byte_coord then
  191. Getcoord:=coord_mark+
  192. (line_byte_number+i-last_mark_byte_number)
  193. else
  194. if byte_coord then Getcoord:=line_byte_number+i
  195. else
  196. if string_coord then Getcoord:=Linenumber
  197. else
  198. if token_coord then Getcoord:=Tokennumber
  199. else
  200. Getcoord:=0;
  201. end;
  202. procedure ER_LEX(er_number:integer);
  203. {***********************************}
  204. var er_atom:a;co:word;erm:string80;
  205. begin
  206. co:=Getcoord;
  207. if collect_errors then begin
  208. Gets1(er_atom,x.sa);
  209. with x.snd^ do begin
  210. dtype:=number;
  211. cord:=co;
  212. val:=er_number;
  213. end;
  214. lconc(errlist {global} ,er_atom);
  215. end;
  216. if screen_errors then
  217. begin
  218. case er_number of
  219. 1 : erm:=' unexpected end of file before end of tree';
  220. 2 : erm:=' unexpected end of file within tree branch ';
  221. 3 : erm:=' unexpected end of file within tree ';
  222. 4 : erm:=' unexpected end of file before end of list ';
  223. 5 : erm:=' unexpected end of tree ';
  224. 6 : erm:=' unexpected end of list or another control character';
  225. 7 : erm:=' unexpected end of file in name ';
  226. 8 : erm:=' unexpected end of file in named object ';
  227. 9 : erm:=' unexpected control character ';
  228. 10 : erm:=' too long atom ( more than 80 bytes ) ';
  229. 11 : erm:=' end of string constant not found in the current line';
  230. 12 : erm:=' end of file before end of comment';
  231. 13 : erm:=' control character within comment ';
  232. 14 : erm:=' control character within string constant';
  233. end;
  234. writeln('Lexical error :',erm);
  235. writeln('Line=',linenumber,' column=',i);
  236. end;
  237. end;
  238. Procedure INITIALIZE_SCAN_VARIABLES;
  239. {***********************************}
  240. var c:char;
  241. begin
  242. for c:=Chr(0) to Chr(255) do upcase_tab[c]:=c;
  243. for c:=Chr(97) to Chr(122) do upcase_tab[c]:=Chr(ord(c)-32);{ASCII}
  244. (* for c:=Chr(160) to Chr(175) do upcase_tab[c]:=Chr(ord(c)-32); *){Russian}
  245. (* for c:=Chr(224) to Chr(239) do upcase_tab[c]:=Chr(ord(c)-60);*){Russian}
  246. { German for Windows }
  247. upcase_tab[chr(228)]:=chr(196);
  248. upcase_tab[chr(246)]:=chr(214);
  249. upcase_tab[chr(252)]:=chr(220);
  250. for c:=Chr(0) to Chr(31) do as[c]:=is_control;
  251. for c:=Chr(128) to Chr(255) do as[c]:=is_letter; { Russian and pseudographics }
  252. for c:=Chr(32) to Chr(127) do as[c]:=is_printable; { not used actually }
  253. as[new_line_code]:=is_control;
  254. as[' ']:=is_space;
  255. as[Chr(9)] :=is_space;
  256. { these are allowed to be First letter of identifiers }
  257. for c:='A'to'Z' do as[c]:=is_letter;
  258. for c:='a'to'z' do as[c]:=is_letter;
  259. for c:='0'to'9' do as[c]:=is_digit;
  260. { allowed to be non-first letter of odentifier }
  261. as['_']:=is_underscore;
  262. { All the rest settings - see procedure SETLEXICS }
  263. end;
  264. Procedure READ_FILE(var read_file_rez:a);
  265. {******************************}
  266. { reads whole input, produces list of items }
  267. label 99;
  268. var dum:a;
  269. begin read_file_rez:=NULL;
  270. repeat a1:=read_item(dum);
  271. if (dt=start_tree)or(dt=end_tree)or(dt=start_list)or
  272. (dt=end_list)or(dt=name_obj) then begin ER_LEX(6);goto 99;end;
  273. if DT<>eof_desk then lconc(read_file_rez,a1);
  274. until DT=eof_desk;
  275. 99: end;
  276. Function READ_ITEM(var pghead:a):a;
  277. {********************}
  278. label 99;
  279. var aadr1:a; result,temp_res,dum,head:a;
  280. begin
  281. READ_ITEM:=NULL; { default value for exits with errors }
  282. pghead:=NULL;
  283. result:=NULL;
  284. TOKEN;
  285. case DT of
  286. atom,idatom,tatom,fatom,keyword:
  287. begin gets1(result,x.sa);
  288. with x.sad^ do begin
  289. dtype:=DT; cord:=saved_coord;
  290. name := AADR;
  291. end
  292. end;
  293. number:
  294. begin gets1(result,x.sa);
  295. with x.snd^ do begin
  296. dtype:=DT; cord:=saved_coord;
  297. val:= AADR; { is set in TOKEN .. is_digit }
  298. end;
  299. end;
  300. start_tree:
  301. begin result:=NULL;
  302. repeat
  303. TOKEN;
  304. aadr1:=AADR; { to save }
  305. if (DT=idatom)or(DT=atom)or(DT=tatom)or(DT=keyword)
  306. { What is allowed selector in scaner input ?
  307. Normally - idatom only, but here
  308. atom is allowed too - for experiment purposes }
  309. then
  310. begin
  311. a1:=Read_item(dum);
  312. if DT=end_list then begin ER_LEX(6); goto 99; end;
  313. if DT=eof_desk then begin ER_LEX(1);goto 99;end;
  314. Addel3(result,aadr1,a1);
  315. end
  316. else if DT<>end_tree then begin ER_LEX(2);goto 99;end;
  317. ;
  318. until (DT=end_tree)or(DT=eof_desk);
  319. if DT=eof_desk then begin ER_LEX(3);goto 99;end;
  320. DT:=complex_desk; { to ignore analysis in upper level of
  321. recursion }
  322. end;
  323. start_list :
  324. begin
  325. result:=NULL;
  326. {$IFDEF MIF}
  327. pghead:=READ_ITEM(dum);
  328. repeat
  329. a1:=read_item(head);
  330. if DT=eof_desk then begin ER_LEX(4);goto 99;end;
  331. if (DT<>end_list) then
  332. begin
  333. if head<>NULL then lconc(result,head);
  334. lconc(result,a1);
  335. end;
  336. until DT=end_list;
  337. {$ELSE}
  338. repeat
  339. a1:=read_item;
  340. if DT=eof_desk then begin ER_LEX(4);goto 99;end;
  341. if (DT<>end_list) then lconc(result,a1);
  342. until DT=end_list;
  343. {$ENDIF}
  344. DT:=complex_desk;
  345. end;
  346. end_tree : begin ER_LEX(5); goto 99; end;
  347. end_list : begin end; {immodiately returns to the upper level}
  348. name_obj: begin
  349. temp_res:=read_item(dum);
  350. if DT=end_list then begin ER_LEX(6); goto 99; end;
  351. if DT=eof_desk then begin ER_LEX(7); goto 99; end;
  352. result :=read_item(dum);
  353. if DT=end_list then begin ER_LEX(6); goto 99; end;
  354. if DT=eof_desk then begin ER_LEX(8); goto 99; end;
  355. if result<>NULL then begin
  356. points(result,x.sa);
  357. if (x.smld^.dtype=listmain)or(x.smtd^.dtype=treemain)
  358. then x.smtd^.name:=temp_res;
  359. end;
  360. DT:=complex_desk;
  361. end;
  362. eof_desk:begin end; { returns to the upper level }
  363. {$IFDEF xSUN}
  364. OTHERWISE
  365. {$ELSE}
  366. ELSE
  367. {$ENDIF}
  368. begin ER_LEX(9);goto 99; end { impossible value }
  369. end; { case }
  370. read_item:=result;
  371. 99: end; { Procedure Read_item }
  372. Procedure READLINE;
  373. {***************************}
  374. { sets new values for "s" and "i" global variables }
  375. label 99;
  376. begin
  377. i:=1; { In any case, so; only here it is initialized.
  378. it is column number.
  379. Variable "s" is string only for speed purposes;
  380. It plays exactly as Packed array of char,
  381. never used as whole and the Length byte is never used }
  382. Linenumber:=linenumber+1;
  383. line_byte_number:=line_byte_number+old_line_length;
  384. case read_mode of
  385. 1:
  386. begin
  387. if eof(inpfile) then begin s[1]:=endfile_code;close(inpfile);end
  388. else
  389. begin readln(inpfile,s);s[length(s)+1]:=new_line_code;
  390. old_line_length:=length(s);
  391. end;
  392. end;
  393. 2:
  394. begin
  395. if PTR1.nel=0 then begin s[1]:=endfile_code;goto 99;end;
  396. if PTR1.cel=0
  397. then begin
  398. s[1]:=new_line_code;
  399. old_line_length:=0;
  400. end
  401. else begin
  402. Pointr(PTR1.cel,X.sa);
  403. with x.sad^ do begin
  404. if (dtype=atom)or (dtype=idatom)or
  405. (dtype=fatom)or (dtype=tatom)or (dtype=keyword)
  406. then begin
  407. pointa(name,bl801,k);
  408. for kk:=1 to k do s[kk]:=bl801[kk];
  409. old_line_length:=k;
  410. s[k+1]:=new_line_code;
  411. end
  412. else
  413. begin
  414. s[1]:=new_line_code; { other objects are ignored }
  415. old_line_length:=0;
  416. end
  417. end; { with }
  418. end; { <>0 }
  419. NEXT(PTR1);
  420. end; {=2}
  421. 3:
  422. begin
  423. {$IFDEF RAMSCAN}
  424. k:=0;
  425. repeat c1:=char(Pointer(ABSPNT)^);
  426. write(':',ord(c1));
  427. inc{1}(longint(ABSPNT));
  428. inc{1}(k);
  429. s[k]:=c1;
  430. until (c1=new_line_code)or(k=126);
  431. if (k=126) then
  432. begin
  433. writeln('SCANNER DIRECT ACCESS WARNING !');
  434. s[127]:=new_line_code;
  435. end;
  436. old_line_length:=k;
  437. {$ENDIF}
  438. end;
  439. 4: begin
  440. {$IFDEF WIN}
  441. (* reads by-byte from global memory stream *)
  442. s:='';
  443. k:=0;
  444. while true do begin
  445. byte(c1):=SM^.readbyte;
  446. if c1=#13 then begin (* eoln symbol reached *)
  447. old_line_length:=k;
  448. (* this variable must refer to last symbol before eoln *)
  449. s[k+1]:=new_line_code;Exit;end;
  450. if c1=#26 then begin (* end of file reached *)
  451. old_line_length:=k;
  452. inc(k);
  453. s[k]:=endfile_code;
  454. SM^.Close;Dispose(SM,Done);Exit;end;
  455. if c1<>#10 then
  456. begin
  457. Inc(k);s[k]:=c1;inc(byte(s[0])); (* adds one character to string *)
  458. end;
  459. end;
  460. {$ENDIF}
  461. end;
  462. end; (* case *)
  463. 99:;
  464. end;
  465. Procedure PutAtom(j:integer);
  466. {***********************}
  467. begin
  468. PutIt(atom,j);
  469. end;
  470. Procedure PutIt(dd:descriptortype;j:integer);
  471. {***********************}
  472. begin
  473. if j>80 then begin DT:=Keyword;ER_LEX(10);j:=80; end else DT:=dd;
  474. putatm(s[i],j,AADR);
  475. saved_coord:=Getcoord;
  476. Tokennumber:=Tokennumber+1;
  477. end;
  478. Procedure PutIdent(j:integer);
  479. {***********************}
  480. begin
  481. PutIt(idatom,j);
  482. end;
  483. Procedure PutFloat(j:integer;digits_before_dot,digits_after_dot:integer);
  484. {***********************}
  485. var REA_VAL:real; ii:integer;
  486. AR:array[1..sizeof(real)+2]of char;
  487. begin
  488. (* Val2(Copy(s,i,j),REA_VAL,ii ignored ); *)
  489. system.Val(Copy(s,i,j),REA_VAL,ii);
  490. if ii<>0 then
  491. begin
  492. if j>80 then j:=80;
  493. DT:=keyword; (* WRONG REAL CONSTANT *)
  494. putatm(s[i],j,aadr);
  495. end
  496. else
  497. begin
  498. ii:=sizeof(real)+2;
  499. for j:=1 to ii-2 do
  500. AR[j]:=REAL_CHAR(REA_VAL)[j];
  501. AR[ii-1]:=chr(digits_before_dot);
  502. AR[ii] :=chr(digits_after_dot);
  503. putatm(AR[1],ii,AADR);
  504. DT:=fatom;
  505. end;
  506. (* writeln(' BEF, AFT =',digits_before_dot,' ',digits_after_dot); *)
  507. saved_coord:=Getcoord;
  508. inc{1}(Tokennumber);
  509. end;
  510. Procedure PutNumber;
  511. {***********************}
  512. begin
  513. DT:=number;
  514. saved_coord:=Getcoord;
  515. inc{1}(Tokennumber);
  516. end;
  517. Procedure PutStr;
  518. {***********************}
  519. begin
  520. if str_constlen>80 then
  521. begin
  522. str_constlen:=80;
  523. DT:=keyword; (* TOO LONG STRING CONSTANT *)
  524. end
  525. else
  526. DT:=TATOM;
  527. putatm(Str_const[1],Str_constlen, AADR);
  528. in_string:=false;
  529. Tokennumber:=Tokennumber+1;
  530. end;
  531. Procedure PutStr2;
  532. {***********************}
  533. begin
  534. if str_constlen>80 then str_constlen:=80;
  535. putatm(Str_const[1],Str_constlen, AADR);
  536. DT:=keyword;
  537. in_string:=false;
  538. Tokennumber:=Tokennumber+1;
  539. end;
  540. { this procedure used only for Modula2 or C-style string constants like 'x', -
  541. to show difference between 'x' and "x".
  542. Access via #_KEYWORD built_in rule is possible.
  543. When input use 'm+p-' !
  544. When output such tokens you should write
  545. IF #_KEYWORD($X) -> OUT <] @ '"' $X '"' ELSIF T-> OUT <] $X FI;
  546. }
  547. Procedure Scaner
  548. {*****************************************}
  549. (mode_parm:integer;
  550. {1=from file,2=list of strings,3=absulute address}
  551. filename:string80;
  552. options_str:string80;
  553. var rez:a; { result; set to NULL if input is absent }
  554. var erlist_parm:a; { error number list; set to NULL if no errors }
  555. strlist:a; { list of atoms }
  556. segm,ofs:longint); { parts of absolute address }
  557. LABEL 1,99;
  558. {INNER FUNCTION}
  559. Function Setop(c:char):boolean;
  560. begin
  561. Setop:=false;
  562. {$IFDEF xSUN}
  563. k:=Index(options_str,c);
  564. {$ELSE}
  565. k:=Pos(c,options_str);
  566. {$ENDIF}
  567. if k<>0 then
  568. if options_str[k+1]<>'-' then Setop:=true;
  569. end;
  570. begin
  571. read_mode:=mode_parm ; { save for global use }
  572. rez:=NULL;
  573. erlist_parm:=NULL;
  574. ERRLIST { global }:=NULL;
  575. { INITIALIZES ALL OPTIONS }
  576. { DEFAULTS }
  577. options_str:=options_str+'D-C-P+p+m-U+S+O+s-t-L-A+R+Y-B-N-';
  578. { to every option su,me default value should be given,
  579. otherwise the flag remains uninitialized }
  580. {ERRORS}
  581. screen_errors :=Setop('S');
  582. collect_errors:=Setop('O');
  583. {COMMENTS}
  584. ada_comment :=Setop('D');
  585. c_comment :=Setop('C');
  586. pascal_comment:=Setop('P');
  587. {STRING CONSTANTS}
  588. pascal_string :=Setop('p');
  589. modula_string :=Setop('m');
  590. to_uppercase :=Setop('U');
  591. {COORDINATE}
  592. string_coord :=Setop('s');
  593. token_coord :=Setop('t');
  594. row80_coord :=Setop('R');
  595. byte_coord :=Setop('Y');
  596. mark_byte_coord:=Setop('B');
  597. mark_only_coord:=Setop('N');
  598. {LANGUAGE_SPECIFIC LEXICS}
  599. c_lexics :=Setop('L');
  600. pascal_lexics :=Setop('A');
  601. { INITIALIZES LANGUAGE-SPECIFIC SETTINGS }
  602. SETLEXICS;
  603. { INITIALIZES "SESSION" FLAGS (alf order)}
  604. coord_mark:=0;
  605. in_comment:=false;
  606. in_string :=false;
  607. is_2quote :=false;
  608. last_mark_byte_number:=0;
  609. line_byte_number:=0;
  610. linenumber:=0;
  611. old_line_length :=0;
  612. str_constlen:=0;
  613. tokennumber:=0;
  614. { INITIALIZES PHYSICAL LEVEL READING }
  615. case read_mode of
  616. 1: begin { READ FROM FILE }
  617. if not existfile(filename)then begin Rez:=0;goto 1;end;
  618. {$IFDEF xSUN}
  619. Reset(inpfile,filename);
  620. {$ELSE}
  621. Assign(inpfile,filename);
  622. Reset(inpfile);
  623. {$ENDIF}
  624. readline; { reads first line of file }
  625. end;
  626. 2:
  627. begin { READ FROM LIST }
  628. if strlist=NULL then begin Rez:=0; goto 99; end;
  629. First(strlist,PTR1); { sets global list-pointer PTR1 }
  630. if (PTR1.ptrtype<>ptrlist) or
  631. (PTR1.nel=0) then begin Rez:=0; goto 99; end;
  632. readline; { reads line from current list-pointer position }
  633. end;
  634. 3:
  635. begin { READ FROM MS-DOS RAM MEMORY }
  636. {$IFDEF RAMSCAN}
  637. { Normalizing memory address }
  638. { Is it necessary for Windows model ? }
  639. { segm:=segm+ofs div 16;
  640. ofs:=ofs mod 16; }
  641. ABSPNT:=system.Ptr(segm,ofs); { This place is DOS-dependent !!! }
  642. if char (ABSPNT^) <> new_line_code
  643. then begin Rez:=0; goto 99; end;
  644. ABSPNT:=Pointer(longint(ABSPNT)+1);
  645. readline;
  646. {$ENDIF}
  647. end;
  648. 4: begin
  649. {$IFDEF WIN}
  650. New(SM,Init);
  651. SM^.open(segm);
  652. if (SM^.GETSIZE>128000)or(SM^.GETSIZE<0) THEN
  653. begin
  654. MESSAGEBOX(0,'Wrong handle to LDT stream',nil,16);
  655. rez:=0; goto 99; end
  656. else if (SM^.GETSIZE=0) then
  657. begin
  658. MESSAGEBOX(0,'Empty LDT stream',nil,16);
  659. rez:=0; goto 99; end
  660. else
  661. begin
  662. readline;
  663. end;
  664. {$ENDIF}
  665. end;
  666. end; (* case *)
  667. READ_FILE(REZ); { Main call ... }
  668. erlist_parm:=ERRLIST {global} ;
  669. 1:;99:;
  670. end;{ procedure Scaner }
  671. Procedure SETLEXICS;
  672. {***********************}
  673. var j:integer;
  674. begin
  675. {$IFDEF MIF}
  676. as['<']:=is_control;
  677. as['>']:=is_control;
  678. Exit;
  679. {$ENDIF}
  680. if pascal_lexics then begin { changes in standard, necessary for Pascal}
  681. as['{']:=is_special;
  682. as['#']:=is_special;
  683. as['$']:=is_special;
  684. as['%']:=is_special;
  685. as['''']:=is_special;
  686. as['(']:=is_special;
  687. two_char_symbols[1]:=':=';
  688. two_char_symbols[2]:='<=';
  689. two_char_symbols[3]:='>=';
  690. two_char_symbols[4]:='**';
  691. two_char_symbols[5]:='..';
  692. two_char_symbols[6]:='<>';
  693. two_char_symbols_num:=6;
  694. set_of_second_of_two:=['=','*','.','>'];
  695. for j:=1 to two_char_symbols_num do
  696. as[two_char_symbols[j][1]]:=is_first_of_two;
  697. if modula_string then
  698. begin
  699. as['"']:=is_special;
  700. as['{']:=is_printable;
  701. end
  702. end
  703. else
  704. if c_lexics then begin
  705. as['_']:=is_letter;
  706. as['$']:=is_letter;
  707. two_char_symbols[1]:='->';
  708. two_char_symbols[2]:='++';
  709. two_char_symbols[3]:='--';
  710. two_char_symbols[4]:='>>';
  711. two_char_symbols[5]:='<<';
  712. two_char_symbols[6]:='==';
  713. two_char_symbols[7]:='+=';
  714. two_char_symbols[8]:='*=';
  715. two_char_symbols[9]:='-=';
  716. two_char_symbols[10]:='/=';
  717. two_char_symbols[11]:='%=';
  718. two_char_symbols[12]:='&=';
  719. two_char_symbols[13]:='^=';
  720. two_char_symbols[14]:='|=';
  721. two_char_symbols[15]:='!=';
  722. two_char_symbols_num:=15;
  723. set_of_second_of_two:=['>','+','-','<','='];
  724. for j:=1 to two_char_symbols_num do
  725. as[two_char_symbols[j][1]]:=is_first_of_two;
  726. as['<']:=is_special; { used to process <<= }
  727. as['>']:=is_special; { used to process >>= }
  728. as['/']:=is_special; { used to process /* }
  729. { otherwise isa:=is_first_of_two
  730. is assigned ! }
  731. as['''']:=is_special;
  732. as['"']:=is_special;
  733. end;
  734. end;
  735. Procedure StrAdd(c:char);
  736. {***********************}
  737. begin
  738. inc{1}(Str_constlen);
  739. if Str_constlen<=80 then Str_const[Str_constlen]:=c;
  740. end;
  741. Procedure StrBegin;
  742. {******************}
  743. begin
  744. saved_coord:=GetCoord;
  745. { It will be used when PutStr works and TOKEN EXITs - in READ_ITEM}
  746. in_string:=true;
  747. Str_constlen:=0;
  748. end;
  749. Procedure Token;
  750. {********************}
  751. label 1,99;
  752. var J,i_saved:integer; { positions }
  753. var digits_before_dot,digits_after_dot:integer;
  754. begin
  755. { at beginning time
  756. "i" - is already set to character in string "s";
  757. array "as" is already initialized;
  758. coordinate mode "coord_mode" is already known;
  759. }
  760. with b123 do begin { variant record ,b1=b2[1]=b3[1]; b2[2]=b3[2] }
  761. 1: { we return to this label if token is not ready still }
  762. b1:=s[i];
  763. isa:=as[b1]; { type of this character }
  764. if isa=is_control
  765. then begin {1}
  766. if b1=new_line_code
  767. then
  768. begin
  769. Readline; { skips to next line, sets new "s" and "i" }
  770. if in_string then begin ER_LEX(11); PutStr2; goto 99; end;
  771. { ERROR= end of line appears in string constant }
  772. goto 1;
  773. end
  774. else
  775. if b1=endfile_code
  776. then
  777. begin
  778. if in_comment then ER_LEX(12);
  779. { ERROR = end of file appears in comment }
  780. DT:=eof_desk;
  781. goto 99;
  782. end
  783. else
  784. { others are control characters; }
  785. { they set "DT" field and then form Rigal list/tree structure }
  786. {$IFDEF MIF}
  787. if not in_string then
  788. {$ENDIF}
  789. begin{2}
  790. if in_comment then begin ER_LEX(13); in_comment:=false; end;
  791. { ERROR = control char in comment }
  792. if in_string then begin ER_LEX(14); PutStr2; goto 99;
  793. { will take control character next time }
  794. end;
  795. { ERROR = control char in string }
  796. DT:=Cont_char_to_dt(b1);
  797. I:=I+1;
  798. If (DT=set_coord)and(as[s[i]]=is_digit)
  799. { this control character sets coordinate to value given in input }
  800. then begin
  801. Coord_mark:=TAKE_DIGITS(J);
  802. Last_mark_byte_number:=line_byte_number+i;
  803. I:=I+J;
  804. goto 1; { DOES NOT RETURNS ! }
  805. end;
  806. goto 99;
  807. end; {2}
  808. {NEVER HERE}
  809. end; {1}
  810. { all the following executes AFTER check of is_control }
  811. {b3[1]:=s[i];} { this character }
  812. b3[2]:=s[i+1]; { next }
  813. b3[3]:=s[i+2]; { next }
  814. if in_comment { CHECK FOR END OF COMMENT }
  815. {THIS part never appears in MIF input, since there is no in_comment status}
  816. then begin {1}
  817. { we are in comment;
  818. here only comments that have some special end mark are processed }
  819. if pascal_comment then begin {2}
  820. if b1='}' then begin inc{3}(i); in_comment:=false; goto 1;end;
  821. if b2='*)' then begin inc{4}(i,2); in_comment:=false; goto 1; end;
  822. end {2}
  823. else
  824. if c_comment then begin {2}
  825. if b2='*/' then begin inc{4}(i,2); in_comment:=false; goto 1;end;
  826. end {2}
  827. ;
  828. inc{3}(i);
  829. goto 1;
  830. end;{1}
  831. {$IFDEF MIF}
  832. if in_string then begin{1}
  833. if b1='''' then begin PutStr; {jncx}inc(i); goto 99; end
  834. {End if MIF constant}
  835. else
  836. if str_constlen=10 then begin PutStr;in_string:=true;{jnc}inc(str_constlen);
  837. {Becomes 11; no shift in input performed, ends part of constant}
  838. goto 99; end
  839. else
  840. if str_constlen=11 then begin
  841. {immediately afrer previous case; starts next part of constant}
  842. StrBegin;{sets str_constlen to 0}
  843. is_2quote:=false;
  844. StrAdd(b1);
  845. {jncx}inc(i);
  846. goto 1; end
  847. else begin
  848. StrAdd(b1); {jncx}inc(i); goto 1; end ; {Normal case}
  849. end;{1}
  850. if b1='`' then begin StrBegin;is_2quote:=false;{jncx}inc(i);goto 1;end;
  851. {Starts new MIF text constant}
  852. if b1='#' then begin Readline; goto 1; end; {MIF comment}
  853. {$ELSE}
  854. if in_string
  855. { CHECK FOR END OF STRING or SOMETHING SPECIAL IN STRING }
  856. then begin {1}
  857. if pascal_string then begin {2}
  858. if b1='''' then begin {3}
  859. if b2[2]='''' then begin StrAdd('''');
  860. inc{4}(i,2);
  861. goto 1;end
  862. { this allow to save '''' as '' }
  863. else begin Putstr;inc{3}(i);goto 99;end;
  864. end {3}
  865. else begin {3}
  866. StrAdd(b1);
  867. inc{3}(i);
  868. goto 1;
  869. end {3}
  870. end; {2}
  871. if modula_string then begin {2}
  872. if is_2quote and (b1='"') then
  873. begin
  874. if b2[2]='"' then begin StrAdd('"');Inc{4}(i,2);goto 1;end
  875. else begin Putstr;inc{3}(i);goto 99;end;
  876. end;
  877. if not(is_2quote) and (b1='''') then
  878. begin Putstr2;inc{3}(i);goto 99;end
  879. else begin {3}
  880. if b1='\' then
  881. begin
  882. StrAdd(b1);
  883. StrAdd(b2[2]);
  884. { this allows to save \? as ? even if \" appears}
  885. inc{4}(i,2);
  886. goto 1;
  887. end;
  888. StrAdd(b1);
  889. inc{3}(i);
  890. goto 1;
  891. end; {3}
  892. end {2}
  893. end ; {1}
  894. {$ENDIF}
  895. { all the following executes AFTER check for in_string & in_comment }
  896. if isa=is_special then begin{1}
  897. {THIS part never appears in MIF input, since there is no is_special}
  898. if pascal_comment then begin{2}
  899. if (b2='(*')
  900. then begin in_comment:=true;inc{4}(i,2);goto 1;end;
  901. if (b1='{')and(b2[2]<>'$')
  902. then begin in_comment:=true;inc{3}(i) ;goto 1;end;
  903. end{2}
  904. else
  905. if c_comment then begin{2}
  906. if b2='/*' then begin in_comment:=true;inc{4}(i,2);goto 1;end;
  907. end {2}
  908. else
  909. if ada_comment then begin {2}
  910. if b2='--' then begin Readline;goto 1; end;
  911. end {2}
  912. ;
  913. if pascal_string then begin {2}
  914. if b1='''' then begin StrBegin;is_2quote:=false;inc{3}(i);goto 1;end;
  915. end {2}
  916. else
  917. if modula_string then begin {2}
  918. if b1='''' then begin StrBegin;is_2quote:=false;inc{3}(i);goto 1;end;
  919. if b1='"' then begin StrBegin;is_2quote:=true;inc{3}(i);goto 1;end;
  920. end {2}
  921. ;
  922. if pascal_lexics then begin {2}
  923. { SPECIALLY TAKES TURBO PASCAL DIRECTIVE-COMMENTS }
  924. if (b2='{$') then begin {3}
  925. j:=0;
  926. repeat inc{3}(j);
  927. until (s[i+j]='}')or(as[s[i+j]]=is_control);
  928. if s[i+j]='}' then inc{3}(j);
  929. PutAtom(j);
  930. inc{4}(i,j);
  931. goto 99;
  932. end; {3}
  933. { ADDITIONAL SYMBOLS }
  934. if (b1='#')or(b1='$')or(b1='%')
  935. then begin {3}
  936. inc{3}(i);
  937. J:=TAKE_LETTERS; { starts from i-th position }
  938. i:=i-1;
  939. PutAtom(J+1); { takes token from i-th position }
  940. inc{4}(i,j+1);
  941. goto 99;
  942. end; {3}
  943. end{2}
  944. else
  945. if c_lexics then begin {2} { SYMBOLS= > < }
  946. { SPECIAL CASES FOR C LANGUAGE }
  947. if (b3='<<=')or(b3='>>=') then begin
  948. PutAtom(3); inc{4}(i,3); goto 99;
  949. end;
  950. isa:=is_first_of_two;
  951. { !! in C case <<, >>, >=, <= will be tested further
  952. in TWO_char_symbols section, hence we go to there
  953. using assignment to "ISA" }
  954. end {2}
  955. end; {1}
  956. if isa=is_space then begin inc{3}(i); goto 1;end;
  957. if isa=is_letter then begin
  958. J:=TAKE_LETTERS;
  959. PutIdent(J);
  960. I:=I+J;
  961. goto 99;
  962. end;
  963. if isa=is_digit then begin {1}
  964. if c_lexics then begin {2}
  965. { ADDITIONAL SYMBOLS; HERE hex and octal numbers are saved as
  966. normal ATOMs }
  967. if (b1='0')
  968. then begin {3}
  969. inc{3}(i);
  970. J:=TAKE_LETTERS; { starts from i-th position }
  971. i:=i-1;
  972. PutAtom(J+1); { takes token from i-th position }
  973. inc{4}(i,j+1);
  974. goto 99;
  975. end; {3}
  976. end; {2}
  977. i_saved:=i; { remember starting position }
  978. AADR:=TAKE_DIGITS(J);
  979. digits_before_dot:=j;
  980. if (j<10)and
  981. ( ((s[i+j]='.')and(s[i+j+1]='.'))
  982. or
  983. (not(s[i+j]in['e','E','.'])) )
  984. then begin PutNumber;inc{4}(i,j);goto 99;end;
  985. inc{4}(i,j);
  986. if s[i]='.' then begin
  987. inc{3}(i);
  988. AADR:=TAKE_DIGITS(J);
  989. digits_after_dot:=j;
  990. inc{4}(i,j);
  991. end;
  992. if s[i] in ['e','E'] then
  993. begin
  994. digits_before_dot:=0;
  995. digits_after_dot:=0;
  996. inc{3}(i);
  997. if s[i] in ['+','-'] then inc{3}(i);
  998. AADR:=TAKE_DIGITS(J);
  999. inc{4}(i,j);
  1000. end;
  1001. J:=i-i_saved;
  1002. i:=i_saved; { to set "i" to starting position }
  1003. PutFloat(J,digits_before_dot,digits_after_dot);
  1004. inc{4}(i,j);
  1005. goto 99;
  1006. end;{1}
  1007. if isa=is_first_of_two then begin {1}
  1008. if b2[2] in set_of_second_of_two then
  1009. for j:=1 to two_char_symbols_num do
  1010. if b2=two_char_symbols[j] then begin
  1011. PutAtom(2);I:=I+2;goto 99;
  1012. end
  1013. end;{1}
  1014. PutAtom(1);inc{3}(i);
  1015. end; { with}
  1016. 99:;
  1017. end; { Procedure }
  1018. Function TAKE_LETTERS:integer;
  1019. {**************************************}
  1020. label 99;
  1021. { Reads only letters, digits and underscores.
  1022. returns number of characters read }
  1023. var JJ:integer; c:char;
  1024. begin
  1025. JJ:=0;
  1026. while true do begin
  1027. c:=s[i+jj];
  1028. isa:=as[c];
  1029. if (isa=is_letter)or(isa=is_digit)or(isa=is_underscore)
  1030. then begin
  1031. if to_uppercase then s[i+jj]:=upcase_tab[c];
  1032. inc{3}(jj)
  1033. end
  1034. else begin
  1035. TAKE_LETTERS:=jj;goto 99;
  1036. end;
  1037. end;
  1038. 99:;
  1039. end;
  1040. Function TAKE_DIGITS(var jj:integer):longint;
  1041. {************************************************}
  1042. label 99;
  1043. var summator:longint; c:char;
  1044. begin
  1045. JJ:=0; summator:=0;
  1046. while true do begin
  1047. c:=s[i+jj];
  1048. if as[c]=is_digit then
  1049. begin summator:=summator*10+ord(c)-ord('0');inc{3}(jj);end
  1050. else begin TAKE_DIGITS:=summator;goto 99;end;
  1051. end;
  1052. 99:;
  1053. end;
  1054. (*begin*)
  1055. { PRESS F8 when debugging ! }
  1056. (*INITIALIZE_SCAN_VARIABLES;*)
  1057. end.