SCANALL.PAS 43 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554
  1. (* changes:
  2. 11-nov-92
  3. Now in m+ mode "AB""CD" is accespted as AB"CD value
  4. Earlier it were two values "AB" and "CD"
  5. mar-93
  6. Now MIF conditional definition is available.
  7. may-93
  8. Now TEX scaner is available.
  9. T+ defines LaTeX lexical rules
  10. and value of new "tex_lexics" boolean variable
  11. @+ defines (additionaly to T+) .sty - file mode of LaTeX,
  12. when @ characters are allowed in command names
  13. .TEX file have to be with 'P-U-p-A-T+'
  14. .STY file have to be with 'P-U-p-A-T+@+'
  15. *)
  16. {$DEFINE RAMSCAN}
  17. {$DEFINE SCANU}
  18. { Rigal integrated environment (c) 1991 Software house Riga }
  19. { }
  20. { }
  21. {$IFDEF SCANU}
  22. {$IFDEF MIF}
  23. Unit Scanmif;
  24. {$ELSE}
  25. Unit Scan;
  26. {$ENDIF}
  27. interface
  28. uses
  29. {$IFDEF WIN} tmemstrm,winprocs, {$ENDIF}
  30. define;
  31. { Separate scanner for input strings and output Rigal objects }
  32. {$IFDEF MIF}
  33. Procedure INITIALIZE_SCAN_VARIABLES_mif;
  34. Procedure Scaner_mif
  35. {$ELSE}
  36. Procedure INITIALIZE_SCAN_VARIABLES;
  37. Procedure Scaner
  38. {$ENDIF}
  39. {*****************************************}
  40. (mode_parm:integer;
  41. {1=from file,2=list of strings,3=absulute address,
  42. 4=stream memory handle }
  43. filename:string80;
  44. options_str:string80;
  45. var rez:a; { result; set to NULL if input is absent }
  46. var erlist_parm:a; { error number list; set to NULL if no errors }
  47. strlist:a; { list of atoms }
  48. segm,ofs:longint); { parts of absolute address }
  49. implementation
  50. uses defpage,nef,poutlexu,doutu;
  51. {$ELSE}
  52. #include "define.p"
  53. #include "defpage.inc"
  54. #include "nef2.inc"
  55. #include "outs.inc"
  56. #include "scan.inc"
  57. {$ENDIF}
  58. {VARIABLES}
  59. const maxline=255;
  60. type bigstring=
  61. {$IFDEF xSUN}
  62. varying array[127] of char;
  63. {$ELSE}
  64. array[1..maxline]of char ;
  65. {$ENDIF}
  66. var a1: a; { global variable for ONLY LOCAL use }
  67. {aa1 : aa ;} { --"-- }
  68. X : MPD; { --"-- }
  69. bl801 : bl80; { -- " -- }{ array[1..80] of char}
  70. k,kk:integer; { -"- }
  71. c1:char;
  72. saved_coord, { where current token began }
  73. coord_mark , { what was set by coordinate marker }
  74. line_byte_number , { number of totally read bytes till beginning
  75. of current line }
  76. last_mark_byte_number , { number of totally read bytes till
  77. last coordinate marker }
  78. old_line_length { length of the current line,
  79. used only for incrementation of "lyne_byte_number"}
  80. :word;
  81. DT:descriptortype; { type of last read token or control character }
  82. AADR:a; { A-space address of last read token }
  83. Linenumber:longint; { Current line number }
  84. Tokennumber:longint;{ Current token number }
  85. Errlist :a; { List of numbers of errors }
  86. Read_mode:integer; { 1,2,3 }
  87. PTR1 : PTR; { List pointer when read from list of atoms }
  88. {$IFDEF RAMSCAN}
  89. ABSPNT : Pointer;{ RAM pointer when read from RAM }
  90. {$ENDIF}
  91. inpfile : text;
  92. var
  93. c_lexics,
  94. pascal_lexics,
  95. sty_lexics,
  96. tex_lexics,
  97. row80_coord,
  98. byte_coord,
  99. mark_byte_coord,
  100. mark_only_coord,
  101. string_coord,
  102. char_coord,
  103. token_coord,
  104. collect_errors,
  105. screen_errors,
  106. to_uppercase,
  107. pascal_comment,
  108. c_comment,
  109. ada_comment,
  110. pascal_string,
  111. modula_string:boolean;
  112. const new_line_code=Chr(13); { Chr(0 is allowed too }
  113. endfile_code =Chr(26);
  114. var s:bigstring;
  115. { s_for_val:bigstring;} { added 17-FEB-92 }
  116. i:integer;
  117. type is_something=
  118. (is_control, is_letter, is_digit, is_underscore, is_printable,
  119. is_special, is_space , is_first_of_two);
  120. type pair=packed array[1..2]of char;
  121. var as:array[char]of is_something;
  122. isa : is_something;
  123. upcase_tab:array[char]of char;
  124. set_of_second_of_two:set of char;
  125. two_char_symbols_num:integer;
  126. two_char_symbols:array[1..30]of pair;
  127. b123 : record
  128. case integer of
  129. 1:(b1:char);
  130. 2:(b2:packed array[1..2]of char);
  131. 3:(b3:packed array[1..3]of char);
  132. end;
  133. var in_comment,in_string : boolean;
  134. var is_2quote : boolean; { if in_string then it is possible }
  135. var Str_constlen:longint;
  136. Str_const : string80; { array of char is allowed too }
  137. {===== SUN VERSION ===}
  138. {$IFDEF SUN}
  139. procedure inc{1}(var xxx:longint);
  140. begin xxx:=xxx+1; end;
  141. procedure inc{2}(var xxx:longint;yyy:longint);
  142. begin xxx:=xxx+yyy; end;
  143. procedure inc{3}(var xxx:integer);
  144. begin xxx:=xxx+1; end;
  145. procedure inc{4}(var xxx:integer;yyy:integer);
  146. begin xxx:=xxx+yyy; end;
  147. {$ENDIF}
  148. {=====}
  149. function CONT_CHAR_TO_DT(c:char):descriptortype;forward;
  150. function Getcoord:word;forward;
  151. procedure ER_LEX(er_number:integer);forward;
  152. {Procedure INITIALIZE_SCAN_VARIABLES;forward;}
  153. Procedure READ_FILE(var read_file_rez:a);forward;
  154. Function READ_ITEM(var pghead:a):a;forward; {NEW!}
  155. Procedure READLINE;forward;
  156. Procedure PutAtom(j:integer);forward;
  157. Procedure PutIt(dd:descriptortype;j:integer);forward;
  158. Procedure PutIdent(j:integer);forward;
  159. Procedure PutFloat(j:integer;digits_before_dot,digits_after_dot:integer);forward;
  160. Procedure PutNumber;forward;
  161. Procedure PutStr(dd:descriptortype);forward;
  162. {Procedure Scaner;forward;}
  163. Procedure SETLEXICS;forward;
  164. Procedure StrAdd(c:char);forward;
  165. Procedure StrBegin;forward;
  166. Procedure Token;forward;
  167. Function TAKE_DIGITS(var jj:integer):longint;forward;
  168. Function TAKE_LETTERS:integer;forward;
  169. { uses global string, received from Readline procedure,
  170. produces global attributes of new one token read.
  171. Some variables also should be initialized if by Initial_token
  172. procedure at the start of the WHOLE programm }
  173. { This procedure is oriented to Turbo Pascal language }
  174. { Procedure call graph follows :
  175. /self\
  176. USEPAS -> SCAN -> READFILE -> READITEM -> ADDEL.nef
  177. -> LCONC.nef ER_LEX
  178. -> SETOPTIONS GETS1.defpage
  179. -> FIRST.nef TOKEN
  180. -> ASSIGN.system POINTR.defpage
  181. -> RESET.system LCONC.nef
  182. -> READLINE
  183. TOKEN -> ER
  184. -> READ_LETTERS_OR_DIGITS
  185. -> READ_DIGITS
  186. -> STRBEGIN
  187. -> PUTNUMBER
  188. -> StrAdd -> ER
  189. -> PUTSTR -> PUTATM.defpage
  190. -> PutAtom ->\
  191. -> PUTFLOAT -> PUTIT -> PUTATM.defpage
  192. -> PUTIDENT ->/
  193. }
  194. function CONT_CHAR_TO_DT(c:char):descriptortype;
  195. {**********************************************}
  196. begin
  197. Cont_char_to_dt:=descriptortype(c);
  198. {$IFDEF MIF}
  199. if c='<' then cont_char_to_dt:=start_list;
  200. if c='>' then cont_char_to_dt:=end_list;
  201. {$ENDIF}
  202. end;
  203. function Getcoord:word;
  204. {***********************}
  205. begin
  206. if row80_coord then Getcoord:=i+Linenumber*80
  207. else
  208. if mark_only_coord then Getcoord:=coord_mark
  209. else
  210. if mark_byte_coord then
  211. Getcoord:=coord_mark+
  212. (line_byte_number+i-last_mark_byte_number)
  213. else
  214. if byte_coord then Getcoord:=line_byte_number+i
  215. else
  216. if string_coord then Getcoord:=Linenumber
  217. else
  218. if token_coord then Getcoord:=Tokennumber
  219. else
  220. Getcoord:=0;
  221. end;
  222. procedure ER_LEX(er_number:integer);
  223. {***********************************}
  224. var er_atom:a;co:word;erm:string80;
  225. begin
  226. co:=Getcoord;
  227. if collect_errors then begin
  228. Gets1(er_atom,x.sa);
  229. with x.snd^ do begin
  230. dtype:=number;
  231. cord:=co;
  232. val:=er_number;
  233. end;
  234. lconc(errlist {global} ,er_atom);
  235. end;
  236. if screen_errors then
  237. begin
  238. case er_number of
  239. 1 : erm:=' unexpected end of file before end of tree';
  240. 2 : erm:=' unexpected end of file within tree branch ';
  241. 3 : erm:=' unexpected end of file within tree ';
  242. 4 : erm:=' unexpected end of file before end of list ';
  243. 5 : erm:=' unexpected end of tree ';
  244. 6 : erm:=' unexpected end of list or another control character';
  245. 7 : erm:=' unexpected end of file in name ';
  246. 8 : erm:=' unexpected end of file in named object ';
  247. 9 : erm:=' unexpected control character ';
  248. 10 : erm:=' too long atom ( more than 80 bytes ) ';
  249. 11 : erm:=' end of string constant not found in the current line';
  250. 12 : erm:=' end of file before end of comment';
  251. 13 : erm:=' control character within comment ';
  252. 14 : erm:=' control character within string constant';
  253. end;
  254. writeln('Lexical error :',erm);
  255. writeln('Line=',linenumber,' column=',i);
  256. end;
  257. end;
  258. {$IFDEF MIF}
  259. Procedure INITIALIZE_SCAN_VARIABLES_mif;
  260. {$ELSE}
  261. Procedure INITIALIZE_SCAN_VARIABLES;
  262. {$ENDIF}
  263. {***********************************}
  264. var c:char;
  265. begin
  266. for c:=Chr(0) to Chr(255) do upcase_tab[c]:=c;
  267. for c:=Chr(97) to Chr(122) do upcase_tab[c]:=Chr(ord(c)-32);{ASCII}
  268. (* for c:=Chr(160) to Chr(175) do upcase_tab[c]:=Chr(ord(c)-32); *){Russian}
  269. (* for c:=Chr(224) to Chr(239) do upcase_tab[c]:=Chr(ord(c)-60);*){Russian}
  270. { German for Windows }
  271. upcase_tab[chr(228)]:=chr(196);
  272. upcase_tab[chr(246)]:=chr(214);
  273. upcase_tab[chr(252)]:=chr(220);
  274. for c:=Chr(0) to Chr(31) do as[c]:=is_control;
  275. for c:=Chr(128) to Chr(255) do as[c]:=is_letter; { Russian and pseudographics }
  276. for c:=Chr(32) to Chr(127) do as[c]:=is_printable; { not used actually }
  277. as[new_line_code]:=is_control;
  278. as[' ']:=is_space;
  279. as[Chr(9)] :=is_space;
  280. { these are allowed to be First letter of identifiers }
  281. for c:='A'to'Z' do as[c]:=is_letter;
  282. for c:='a'to'z' do as[c]:=is_letter;
  283. for c:='0'to'9' do as[c]:=is_digit;
  284. { allowed to be non-first letter of odentifier }
  285. as['_']:=is_underscore;
  286. { All the rest settings - see procedure SETLEXICS }
  287. end;
  288. Procedure READ_FILE(var read_file_rez:a);
  289. {******************************}
  290. { reads whole input, produces list of items }
  291. label 99;
  292. var head:a;
  293. begin read_file_rez:=NULL;
  294. repeat a1:=read_item(head);
  295. if (dt=start_tree)or(dt=end_tree)or(dt=start_list)or
  296. (dt=end_list)or(dt=name_obj) then begin ER_LEX(6);goto 99;end;
  297. if DT<>eof_desk then
  298. begin
  299. if head<>NULL then lconc(read_file_rez,head);
  300. lconc(read_file_rez,a1);
  301. end;
  302. until DT=eof_desk;
  303. 99: end;
  304. Function READ_ITEM(var pghead:a):a;
  305. {********************}
  306. label 99;
  307. var aadr1:a; result,temp_res,dum,head:a;
  308. begin
  309. READ_ITEM:=NULL; { default value for exits with errors }
  310. pghead:=NULL;
  311. result:=NULL;
  312. TOKEN;
  313. case DT of
  314. atom,idatom,tatom,fatom,keyword:
  315. begin gets1(result,x.sa);
  316. with x.sad^ do begin
  317. dtype:=DT; cord:=saved_coord;
  318. name := AADR;
  319. end
  320. end;
  321. number:
  322. begin gets1(result,x.sa);
  323. with x.snd^ do begin
  324. dtype:=DT; cord:=saved_coord;
  325. val:= AADR; { is set in TOKEN .. is_digit }
  326. end;
  327. end;
  328. dummy: result:=NULL;
  329. start_tree:
  330. begin result:=NULL;
  331. repeat
  332. TOKEN;
  333. aadr1:=AADR; { to save }
  334. if (DT=idatom)or(DT=atom)or(DT=tatom)or(DT=keyword)
  335. { What is allowed selector in scaner input ?
  336. Normally - idatom only, but here
  337. atom is allowed too - for experiment purposes }
  338. then
  339. begin
  340. a1:=Read_item(dum);
  341. if DT=end_list then begin ER_LEX(6); goto 99; end;
  342. if DT=eof_desk then begin ER_LEX(1);goto 99;end;
  343. Addel3(result,aadr1,a1);
  344. end
  345. else if DT<>end_tree then begin ER_LEX(2);goto 99;end;
  346. ;
  347. until (DT=end_tree)or(DT=eof_desk);
  348. if DT=eof_desk then begin ER_LEX(3);goto 99;end;
  349. DT:=complex_desk; { to ignore analysis in upper level of
  350. recursion }
  351. end;
  352. start_list :
  353. begin
  354. result:=NULL;
  355. {$IFDEF MIF}
  356. pghead:=READ_ITEM(dum);
  357. repeat
  358. a1:=read_item(head);
  359. if DT=eof_desk then begin ER_LEX(4);goto 99;end;
  360. if (DT<>end_list) then
  361. begin
  362. if head<>NULL then lconc(result,head);
  363. lconc(result,a1);
  364. end;
  365. until DT=end_list;
  366. {$ELSE}
  367. repeat
  368. a1:=read_item(dum);
  369. if DT=eof_desk then begin ER_LEX(4);goto 99;end;
  370. if (DT<>end_list) then lconc(result,a1);
  371. until DT=end_list;
  372. {$ENDIF}
  373. DT:=complex_desk;
  374. end;
  375. end_tree : begin ER_LEX(5); goto 99; end;
  376. end_list : begin end; {immodiately returns to the upper level}
  377. name_obj: begin
  378. temp_res:=read_item(dum);
  379. if DT=end_list then begin ER_LEX(6); goto 99; end;
  380. if DT=eof_desk then begin ER_LEX(7); goto 99; end;
  381. result :=read_item(dum);
  382. if DT=end_list then begin ER_LEX(6); goto 99; end;
  383. if DT=eof_desk then begin ER_LEX(8); goto 99; end;
  384. if result<>NULL then begin
  385. points(result,x.sa);
  386. if (x.smld^.dtype=listmain)or(x.smtd^.dtype=treemain)
  387. then x.smtd^.name:=temp_res;
  388. end;
  389. DT:=complex_desk;
  390. end;
  391. eof_desk:begin end; { returns to the upper level }
  392. {$IFDEF xSUN}
  393. OTHERWISE
  394. {$ELSE}
  395. ELSE
  396. {$ENDIF}
  397. begin ER_LEX(9);goto 99; end { impossible value }
  398. end; { case }
  399. read_item:=result;
  400. 99: end; { Procedure Read_item }
  401. Procedure READLINE;
  402. {***************************}
  403. { sets new values for "s" and "i" global variables }
  404. label 99;
  405. begin
  406. i:=1; { In any case, so; only here it is initialized.
  407. it is column number.
  408. Variable "s" is string only for speed purposes;
  409. It plays exactly as Packed array of char,
  410. never used as whole and the Length byte is never used }
  411. Linenumber:=linenumber+1;
  412. line_byte_number:=line_byte_number+old_line_length;
  413. case read_mode of
  414. 1:
  415. begin
  416. if eof(inpfile) then begin s[1]:=endfile_code;close(inpfile);end
  417. else
  418. begin
  419. {$IFDEF STRING_READ}
  420. readln(inpfile,s);s[length(s)+1]:=new_line_code;
  421. old_line_length:=length(s);
  422. {$ELSE}
  423. old_line_length:=0;
  424. while not(eoln(inpfile)) do
  425. begin
  426. inc(old_line_length);
  427. if old_line_length<maxline-1 then
  428. read(inpfile,s[old_line_length])
  429. else
  430. begin
  431. writeln(' FATAL ERROR: Line ',linenumber,' too long !');
  432. old_line_length:=0;
  433. readln;
  434. end;
  435. end;
  436. readln(inpfile);
  437. s[old_line_length+1]:=new_line_code;
  438. {$ENDIF}
  439. end;
  440. end;
  441. 2:
  442. begin
  443. if PTR1.nel=0 then begin s[1]:=endfile_code;goto 99;end;
  444. if PTR1.cel=0
  445. then begin
  446. s[1]:=new_line_code;
  447. old_line_length:=0;
  448. end
  449. else begin
  450. Pointr(PTR1.cel,X.sa);
  451. with x.sad^ do begin
  452. if (dtype=atom)or (dtype=idatom)or
  453. (dtype=fatom)or (dtype=tatom)or (dtype=keyword)
  454. then begin
  455. pointa(name,bl801,k);
  456. for kk:=1 to k do s[kk]:=bl801[kk];
  457. old_line_length:=k;
  458. s[k+1]:=new_line_code;
  459. end
  460. else
  461. begin
  462. s[1]:=new_line_code; { other objects are ignored }
  463. old_line_length:=0;
  464. end
  465. end; { with }
  466. end; { <>0 }
  467. NEXT(PTR1);
  468. end; {=2}
  469. 3:
  470. begin
  471. {$IFDEF RAMSCAN}
  472. k:=0;
  473. repeat c1:=char(Pointer(ABSPNT)^);
  474. write(':',ord(c1));
  475. inc{1}(longint(ABSPNT));
  476. inc{1}(k);
  477. s[k]:=c1;
  478. until (c1=new_line_code)or(k=126);
  479. if (k=126) then
  480. begin
  481. writeln('SCANNER DIRECT ACCESS WARNING !');
  482. s[127]:=new_line_code;
  483. end;
  484. old_line_length:=k;
  485. {$ENDIF}
  486. end;
  487. 4: begin
  488. {$IFDEF WIN}
  489. (* reads by-byte from global memory stream *)
  490. k:=0;
  491. while true do begin
  492. if k > maxline-1 then begin
  493. writeln(' FATAL ERROR: Line too long !');
  494. k:=0;
  495. readln;
  496. end;
  497. byte(c1):=SM^.readbyte;
  498. if c1=#13 then begin (* eoln symbol reached *)
  499. old_line_length:=k;
  500. (* this variable must refer to last symbol before eoln *)
  501. s[k+1]:=new_line_code;Exit;end;
  502. if c1=#26 then begin (* end of file reached *)
  503. old_line_length:=k;
  504. inc(k);
  505. s[k]:=endfile_code;
  506. SM^.Close;Dispose(SM,Done);Exit;end;
  507. if c1<>#10 then
  508. begin
  509. Inc(k);s[k]:=c1;
  510. {inc(byte(s[0]));} (* adds one character to string *)
  511. end;
  512. end;
  513. {$ENDIF}
  514. end;
  515. end; (* case *)
  516. 99:;
  517. end;
  518. Procedure PutAtom(j:integer);
  519. {***********************}
  520. begin
  521. PutIt(atom,j);
  522. end;
  523. Procedure PutIt(dd:descriptortype;j:integer);
  524. {***********************}
  525. begin
  526. if j>80 then begin DT:=Keyword;ER_LEX(10);j:=80; end else DT:=dd;
  527. putatm(s[i],j,AADR);
  528. saved_coord:=Getcoord;
  529. Tokennumber:=Tokennumber+1;
  530. end;
  531. Procedure PutIdent(j:integer);
  532. {***********************}
  533. begin
  534. PutIt(idatom,j);
  535. end;
  536. Procedure PutFloat(j:integer;digits_before_dot,digits_after_dot:integer);
  537. {***********************}
  538. var REA_VAL:real; ii:integer;
  539. floatstring:string;
  540. AR:array[1..sizeof(real)+2]of char;
  541. begin
  542. (* Val2(Copy(s,i,j),REA_VAL,ii ignored ); *)
  543. floatstring:='';
  544. for k:=1 to j do floatstring:=floatstring+s[i+k-1];
  545. system.Val(floatstring,REA_VAL,ii);
  546. if ii<>0 then
  547. begin
  548. if j>80 then j:=80;
  549. DT:=keyword; (* WRONG REAL CONSTANT *)
  550. putatm(s[i],j,aadr);
  551. end
  552. else
  553. begin
  554. ii:=sizeof(real)+2;
  555. for j:=1 to ii-2 do
  556. AR[j]:=REAL_CHAR(REA_VAL)[j];
  557. AR[ii-1]:=chr(digits_before_dot);
  558. AR[ii] :=chr(digits_after_dot);
  559. putatm(AR[1],ii,AADR);
  560. DT:=fatom;
  561. end;
  562. (* writeln(' BEF, AFT =',digits_before_dot,' ',digits_after_dot); *)
  563. saved_coord:=Getcoord;
  564. inc{1}(Tokennumber);
  565. end;
  566. Procedure PutNumber;
  567. {***********************}
  568. begin
  569. DT:=number;
  570. saved_coord:=Getcoord;
  571. inc{1}(Tokennumber);
  572. end;
  573. Procedure PutStr(dd:descriptortype);
  574. {***********************}
  575. var iii:integer;
  576. begin
  577. if dd=KEYWORD then begin
  578. writeln('Erroneous string is:');
  579. for iii:=1 to str_constlen do write(str_const[iii]);
  580. writeln;
  581. readln;
  582. end;
  583. if str_constlen>80 then
  584. begin
  585. str_constlen:=80;
  586. DT:=keyword; (* TOO LONG STRING CONSTANT *)
  587. end
  588. else
  589. DT:=dd;
  590. putatm(Str_const[1],Str_constlen, AADR);
  591. in_string:=false;
  592. Tokennumber:=Tokennumber+1;
  593. {$IFDEF MIF}
  594. if Str_constlen=0 then DT:=dummy;
  595. { String constants of 0 length are converted to NULL}
  596. {$ENDIF}
  597. end;
  598. { Normally this procedure uses parameter TATOM ;
  599. this procedure used with parameter KEYWORD only for Modula2
  600. or C-style string constants like 'x', -
  601. to show difference between 'x' and "x".
  602. Access via #_KEYWORD built_in rule is possible.
  603. When input use 'm+p-' !
  604. When output such tokens you should write
  605. IF #_KEYWORD($X) -> OUT <] @ '"' $X '"' ELSIF T-> OUT <] $X FI;
  606. }
  607. {$IFDEF MIF}
  608. Procedure Scaner_mif
  609. {$ELSE}
  610. Procedure Scaner
  611. {$ENDIF}
  612. {*****************************************}
  613. (mode_parm:integer;
  614. {1=from file,2=list of strings,3=absulute address}
  615. filename:string80;
  616. options_str:string80;
  617. var rez:a; { result; set to NULL if input is absent }
  618. var erlist_parm:a; { error number list; set to NULL if no errors }
  619. strlist:a; { list of atoms }
  620. segm,ofs:longint); { parts of absolute address }
  621. LABEL 1,99;
  622. {INNER FUNCTION}
  623. Function Setop(c:char):boolean;
  624. begin
  625. Setop:=false;
  626. {$IFDEF xSUN}
  627. k:=Index(options_str,c);
  628. {$ELSE}
  629. k:=Pos(c,options_str);
  630. {$ENDIF}
  631. if k<>0 then
  632. if options_str[k+1]<>'-' then Setop:=true;
  633. end;
  634. begin
  635. read_mode:=mode_parm ; { save for global use }
  636. rez:=NULL;
  637. erlist_parm:=NULL;
  638. ERRLIST { global }:=NULL;
  639. { INITIALIZES ALL OPTIONS }
  640. { DEFAULTS }
  641. options_str:=options_str+'D-C-P+p+m-U+S+O+s-t-L-A+R+Y-B-N-T-@-';
  642. { to every option su,me default value should be given,
  643. otherwise the flag remains uninitialized }
  644. {ERRORS}
  645. screen_errors :=Setop('S');
  646. collect_errors:=Setop('O');
  647. {COMMENTS}
  648. ada_comment :=Setop('D');
  649. c_comment :=Setop('C');
  650. pascal_comment:=Setop('P');
  651. {STRING CONSTANTS}
  652. pascal_string :=Setop('p');
  653. modula_string :=Setop('m');
  654. to_uppercase :=Setop('U');
  655. {COORDINATE}
  656. string_coord :=Setop('s');
  657. token_coord :=Setop('t');
  658. row80_coord :=Setop('R');
  659. byte_coord :=Setop('Y');
  660. mark_byte_coord:=Setop('B');
  661. mark_only_coord:=Setop('N');
  662. {LANGUAGE_SPECIFIC LEXICS}
  663. c_lexics :=Setop('L');
  664. pascal_lexics :=Setop('A');
  665. tex_lexics :=Setop('T');
  666. sty_lexics :=Setop('@');
  667. { INITIALIZES LANGUAGE-SPECIFIC SETTINGS }
  668. SETLEXICS;
  669. { INITIALIZES "SESSION" FLAGS (alf order)}
  670. coord_mark:=0;
  671. in_comment:=false;
  672. in_string :=false;
  673. is_2quote :=false;
  674. last_mark_byte_number:=0;
  675. line_byte_number:=0;
  676. linenumber:=0;
  677. old_line_length :=0;
  678. str_constlen:=0;
  679. tokennumber:=0;
  680. { INITIALIZES PHYSICAL LEVEL READING }
  681. case read_mode of
  682. 1: begin { READ FROM FILE }
  683. if not existfile(filename)then begin Rez:=0;goto 1;end;
  684. {$IFDEF xSUN}
  685. Reset(inpfile,filename);
  686. {$ELSE}
  687. Assign(inpfile,filename);
  688. Reset(inpfile);
  689. {$ENDIF}
  690. readline; { reads first line of file }
  691. end;
  692. 2:
  693. begin { READ FROM LIST }
  694. if strlist=NULL then begin Rez:=0; goto 99; end;
  695. First(strlist,PTR1); { sets global list-pointer PTR1 }
  696. if (PTR1.ptrtype<>ptrlist) or
  697. (PTR1.nel=0) then begin Rez:=0; goto 99; end;
  698. readline; { reads line from current list-pointer position }
  699. end;
  700. 3:
  701. begin { READ FROM MS-DOS RAM MEMORY }
  702. {$IFDEF RAMSCAN}
  703. { Normalizing memory address }
  704. { Is it necessary for Windows model ? }
  705. { segm:=segm+ofs div 16;
  706. ofs:=ofs mod 16; }
  707. ABSPNT:=system.Ptr(segm,ofs); { This place is DOS-dependent !!! }
  708. if char (ABSPNT^) <> new_line_code
  709. then begin Rez:=0; goto 99; end;
  710. ABSPNT:=Pointer(longint(ABSPNT)+1);
  711. readline;
  712. {$ENDIF}
  713. end;
  714. 4: begin
  715. {$IFDEF WIN}
  716. New(SM,Init);
  717. SM^.open(segm);
  718. if (SM^.GETSIZE>128000)or(SM^.GETSIZE<0) THEN
  719. begin
  720. MESSAGEBOX(0,'Wrong handle to LDT stream',nil,16);
  721. rez:=0; goto 99; end
  722. else if (SM^.GETSIZE=0) then
  723. begin
  724. MESSAGEBOX(0,'Empty LDT stream',nil,16);
  725. rez:=0; goto 99; end
  726. else
  727. begin
  728. readline;
  729. end;
  730. {$ENDIF}
  731. end;
  732. end; (* case *)
  733. READ_FILE(REZ); { Main call ... }
  734. erlist_parm:=ERRLIST {global} ;
  735. 1:;99:;
  736. end;{ procedure Scaner }
  737. Procedure SETLEXICS;
  738. {***********************}
  739. var j:integer;
  740. begin
  741. {$IFDEF MIF}
  742. as['<']:=is_control;
  743. as['>']:=is_control;
  744. Exit;
  745. {$ENDIF}
  746. if pascal_lexics then begin { changes in standard, necessary for Pascal}
  747. as['{']:=is_special;
  748. as['#']:=is_special;
  749. as['$']:=is_special;
  750. as['%']:=is_special;
  751. as['''']:=is_special;
  752. as['(']:=is_special;
  753. two_char_symbols[1]:=':=';
  754. two_char_symbols[2]:='<=';
  755. two_char_symbols[3]:='>=';
  756. two_char_symbols[4]:='**';
  757. two_char_symbols[5]:='..';
  758. two_char_symbols[6]:='<>';
  759. two_char_symbols_num:=6;
  760. set_of_second_of_two:=['=','*','.','>'];
  761. for j:=1 to two_char_symbols_num do
  762. as[two_char_symbols[j][1]]:=is_first_of_two;
  763. if modula_string then
  764. begin
  765. as['"']:=is_special;
  766. as['{']:=is_printable;
  767. end
  768. end
  769. else
  770. if c_lexics then begin
  771. as['_']:=is_letter;
  772. as['$']:=is_letter;
  773. two_char_symbols[1]:='->';
  774. two_char_symbols[2]:='++';
  775. two_char_symbols[3]:='--';
  776. two_char_symbols[4]:='>>';
  777. two_char_symbols[5]:='<<';
  778. two_char_symbols[6]:='==';
  779. two_char_symbols[7]:='+=';
  780. two_char_symbols[8]:='*=';
  781. two_char_symbols[9]:='-=';
  782. two_char_symbols[10]:='/=';
  783. two_char_symbols[11]:='%=';
  784. two_char_symbols[12]:='&=';
  785. two_char_symbols[13]:='^=';
  786. two_char_symbols[14]:='|=';
  787. two_char_symbols[15]:='!=';
  788. two_char_symbols_num:=15;
  789. set_of_second_of_two:=['>','+','-','<','='];
  790. for j:=1 to two_char_symbols_num do
  791. as[two_char_symbols[j][1]]:=is_first_of_two;
  792. as['<']:=is_special; { used to process <<= }
  793. as['>']:=is_special; { used to process >>= }
  794. as['/']:=is_special; { used to process /* }
  795. { otherwise isa:=is_first_of_two
  796. is assigned ! }
  797. as['''']:=is_special;
  798. as['"']:=is_special;
  799. end;
  800. if tex_lexics then begin
  801. as['!']:=is_special;
  802. as['"']:=is_special;
  803. as['#']:=is_special;
  804. as['$']:=is_special;
  805. as['%']:=is_special;
  806. as['&']:=is_special;
  807. as['/']:=is_special;
  808. as['{']:=is_special;
  809. as['}']:=is_special;
  810. as['(']:=is_special;
  811. as[')']:=is_special;
  812. as['[']:=is_special;
  813. as[']']:=is_special;
  814. as['=']:=is_special;
  815. as['?']:=is_special;
  816. as['\']:=is_special;
  817. as['`']:=is_special;
  818. as['^']:=is_special;
  819. as['~']:=is_special;
  820. as['*']:=is_special;
  821. as['<']:=is_special;
  822. as['>']:=is_special;
  823. as['|']:=is_special;
  824. as[';']:=is_special;
  825. as[',']:=is_special;
  826. as[':']:=is_special;
  827. as['.']:=is_special;
  828. as['-']:=is_special;
  829. as['_']:=is_special;
  830. as['''']:=is_special;
  831. if sty_lexics {It is the only place where sty_lexics is used}
  832. then as['@']:=is_letter
  833. else as['@']:=is_special;
  834. end;
  835. end;
  836. Procedure StrAdd(c:char); {Adds one character to string constant}
  837. {***********************}
  838. begin
  839. inc{1}(Str_constlen);
  840. if Str_constlen<=80 then Str_const[Str_constlen]:=c;
  841. end;
  842. Procedure StrBegin; {Initiates string constant}
  843. {******************}
  844. begin
  845. saved_coord:=GetCoord;
  846. { It will be used when PutStr works and TOKEN EXITs - in READ_ITEM}
  847. in_string:=true;
  848. Str_constlen:=0;
  849. end;
  850. Procedure Token; {Reads one token from the input stream}
  851. {********************}
  852. label 1,99;
  853. var J,i_saved:integer; { positions }
  854. var digits_before_dot,digits_after_dot:integer;
  855. begin
  856. { at beginning time
  857. "i" - is already set to character in string "s";
  858. array "as" is already initialized;
  859. coordinate mode "coord_mode" is already known;
  860. }
  861. with b123 do begin { variant record ,b1=b2[1]=b3[1]; b2[2]=b3[2] }
  862. 1: { we return to this label if token is not ready still }
  863. b1:=s[i];
  864. isa:=as[b1]; { type of this character }
  865. if isa=is_control
  866. then begin {1}
  867. if b1=new_line_code
  868. then
  869. begin
  870. Readline; { skips to next line, sets new "s" and "i" }
  871. if tex_lexics then begin DT:=dummy; goto 99;end; {NULL is returned at the end of line}
  872. if in_string then begin
  873. {$IFDEF MIF}
  874. writeln('s=[',s,'], constlen=',str_constlen,' i=',i,'b1=',b1);
  875. PutStr(ATOM); goto 99;
  876. {$ELSE}
  877. ER_LEX(11); PutStr(KEYWORD); goto 99;
  878. {$ENDIF}
  879. end;
  880. { ERROR= end of line appears in string constant }
  881. goto 1;
  882. end
  883. else
  884. if b1=endfile_code
  885. then
  886. begin
  887. if in_comment then ER_LEX(12);
  888. { ERROR = end of file appears in comment }
  889. DT:=eof_desk;
  890. goto 99;
  891. end
  892. else
  893. { others are CONTROL characters; }
  894. { they set "DT" field and then form Rigal list/tree structure }
  895. {$IFDEF MIF}
  896. if not in_string then
  897. {$ENDIF}
  898. begin{2}
  899. if in_comment then begin ER_LEX(13); in_comment:=false; end;
  900. { ERROR = control char in comment }
  901. if in_string then begin ER_LEX(14); PutStr(KEYWORD); goto 99;
  902. { will take control character next time }
  903. end;
  904. { ERROR = control char in string }
  905. DT:=Cont_char_to_dt(b1);
  906. I:=I+1;
  907. If (DT=set_coord)and(as[s[i]]=is_digit)
  908. { this control character sets coordinate to value given in input }
  909. then begin
  910. Coord_mark:=TAKE_DIGITS(J);
  911. Last_mark_byte_number:=line_byte_number+i;
  912. I:=I+J;
  913. goto 1; { DOES NOT RETURNS ! }
  914. end;
  915. goto 99;
  916. end; {2}
  917. {NEVER HERE}
  918. end; {1}
  919. { all the following executes AFTER check of is_control }
  920. {b3[1]:=s[i];} { this character }
  921. b3[2]:=s[i+1]; { next }
  922. b3[3]:=s[i+2]; { next }
  923. if in_comment { CHECK FOR END OF COMMENT }
  924. {THIS part never appears in MIF input, since there is no in_comment status}
  925. then begin {1}
  926. { we are in comment;
  927. here only comments that have some special end mark are processed }
  928. if pascal_comment then begin {2}
  929. if b1='}' then begin inc{3}(i); in_comment:=false; goto 1;end;
  930. if b2='*)' then begin inc{4}(i,2); in_comment:=false; goto 1; end;
  931. end {2}
  932. else
  933. if c_comment then begin {2}
  934. if b2='*/' then begin inc{4}(i,2); in_comment:=false; goto 1;end;
  935. end {2}
  936. ;
  937. inc{3}(i);
  938. goto 1;
  939. end;{1}
  940. {$IFDEF MIF}
  941. if in_string then begin{1}
  942. if b1='''' then begin
  943. if str_constlen=81 then begin
  944. {constant ends after special character or in 80 character}
  945. StrBegin;{sets str_constlen to 0}
  946. is_2quote:=false;
  947. end;
  948. PutStr(ATOM);
  949. {jncx}inc(i);
  950. goto 99;
  951. end
  952. {End if MIF constant}
  953. else
  954. {$IFDEF BBS}
  955. { BBS variant of processing:
  956. If \\ appears in MIF format, \ is included output atom;
  957. \> >
  958. \xd2 "
  959. \xd4 $B4
  960. \xd5 '
  961. If \letters
  962. then new IDENT-atom is created. It contains only 'letters', without \
  963. }
  964. if b1='\' then begin
  965. if b2[2]='\' then begin StrAdd(b2[2]);inc{jcnx2}(i,2);goto 1;end;
  966. if b2[2]='>' then begin StrAdd(b2[2]);inc{jcnx2}(i,2);goto 1;end;
  967. if (b2[2]='x')and(b3[3]='d')and(s[i+3]in['2','4','5']) then begin
  968. case s[i+3] of
  969. '2':StrAdd('"');
  970. '4':StrAdd('´');
  971. '5':StrAdd('''');
  972. end;
  973. inc{jcnx2}(i,4);goto 1;end;
  974. (* UNREGISTRED CONTROL SEQUENCE *)
  975. PutStr(ATOM);
  976. inc(i); (* symbol \ is ignored *)
  977. in_string:=true; str_constlen:=82; (* special value, normally impossible *)
  978. goto 99;
  979. end;
  980. {$ELSE}
  981. { LATEX variant of processing:
  982. If any of control characters appears then a separate IDATOM with such contents
  983. is created.
  984. If \x(N)(N)(space) then IDATOM with \x(N)(N) is created;
  985. If \t then IDATOM t
  986. \b b
  987. \n n
  988. \\ \\
  989. \> \>
  990. \(another letter) \(another letter)
  991. Empty MIF strings are coded as NULL
  992. After and before IDENTs additional NULLs can appear sometimes
  993. }
  994. if str_constlen=83 then{ special value for taking one character to next IDATOM }
  995. begin
  996. StrBegin;is_2quote:=false;
  997. StrAdd(b1); PutStr(IDATOM); inc(i);in_string:=true;
  998. str_constlen:=81; { go to next part of the constant} goto 99;end;
  999. if str_constlen=84 then{ special value for taking two characters to next IDATOM }
  1000. begin
  1001. StrBegin;is_2quote:=false;
  1002. StrAdd(b1);StrAdd(b2[2]); PutStr(IDATOM); inc(i,2);in_string:=true;
  1003. str_constlen:=81; { go to next part of the constant} goto 99;end;
  1004. if str_constlen=85 then{ special value for taking 4 characters to next IDATOM,
  1005. and ignoring one more after them }
  1006. begin
  1007. StrBegin;is_2quote:=false;
  1008. StrAdd(b2[1]);StrAdd(b2[2]);StrAdd(b3[3]);StrAdd(s[i+3]); PutStr(IDATOM); inc(i,5);in_string:=true;
  1009. str_constlen:=81; { go to next part of the constant} goto 99;end;
  1010. if b1 in ['§','!','"','@','#','¤','$','%','&','/',
  1011. '{','}','[',']','`','^','~','>','|','<','_','-','*',',']
  1012. (* COMMA (last ,) added 6/6/95 *)
  1013. then begin
  1014. if str_constlen=81 then DT:=dummy else PutStr(ATOM);
  1015. in_string:=true;str_constlen:=83;{ special value for taking one character to next IDATOM }
  1016. goto 99;
  1017. end;
  1018. if b1='\' then begin
  1019. if b2[2]='x' then begin
  1020. if str_constlen=81 then DT:=dummy else PutStr(ATOM);
  1021. in_string:=true;str_constlen:=85;{take 4 chars}goto 99;end;
  1022. if (b2[2]in ['t','b','n']) then begin Inc(i);
  1023. if str_constlen=81 then DT:=dummy else PutStr(ATOM);
  1024. in_string:=true;str_constlen:=83;{take 1 char}goto 99;end;
  1025. if str_constlen=81 then DT:=dummy else PutStr(ATOM);in_string:=true;str_constlen:=84;{take 2 chars}goto 99;
  1026. end;
  1027. {$ENDIF}
  1028. if str_constlen<80 then
  1029. begin StrAdd(b1); {jncx}inc(i); goto 1; end {Normal case}
  1030. else
  1031. if str_constlen=82 then begin
  1032. (* ***
  1033. StrBegin;is_2quote:=false;
  1034. StrAdd(b3[1]);StrAdd(b3[2]);StrAdd(b3[3]);
  1035. PutStr(IDATOM); special 3-character identifier
  1036. *** *)
  1037. J:=TAKE_LETTERS;
  1038. PutIdent(J);
  1039. in_string:=true;
  1040. str_constlen:=81; (* special value, normally impossible;
  1041. used for starting next part of the constant *)
  1042. I:=I+J; (* *** *)
  1043. goto 99;
  1044. end
  1045. else
  1046. if str_constlen=80 then begin PutStr(ATOM);in_string:=true;{jnc}inc(str_constlen);
  1047. {Becomes 81; no shift in input performed, ends part of constant}
  1048. goto 99; end
  1049. else
  1050. if str_constlen=81 then begin
  1051. {immediately afrer previous case; starts next part of constant}
  1052. StrBegin;{sets str_constlen to 0}
  1053. is_2quote:=false;
  1054. {$IFDEF BBS}
  1055. StrAdd(b1);
  1056. {jncx}inc(i);
  1057. {$ELSE}
  1058. {$ENDIF}
  1059. goto 1; end
  1060. end;{1}
  1061. if b1='`' then begin StrBegin;is_2quote:=false;{jncx}inc(i);goto 1;end;
  1062. {Starts new MIF text constant}
  1063. if b1='#' then begin Readline; goto 1; end; {MIF comment}
  1064. {$ELSE}
  1065. if in_string
  1066. { CHECK FOR END OF STRING or SOMETHING SPECIAL IN STRING }
  1067. then begin {1}
  1068. if pascal_string then begin {2}
  1069. if b1='''' then begin {3}
  1070. if b2[2]='''' then begin StrAdd('''');
  1071. inc{4}(i,2);
  1072. goto 1;end
  1073. { this allow to save '''' as '' }
  1074. else begin Putstr(TATOM);inc{3}(i);goto 99;end;
  1075. end {3}
  1076. else begin {3}
  1077. StrAdd(b1);
  1078. inc{3}(i);
  1079. goto 1;
  1080. end {3}
  1081. end; {2}
  1082. if modula_string then begin {2}
  1083. if is_2quote and (b1='"') then
  1084. begin
  1085. if b2[2]='"' then begin StrAdd('"');Inc{4}(i,2);goto 1;end
  1086. else begin Putstr(TATOM);inc{3}(i);goto 99;end;
  1087. end;
  1088. if not(is_2quote) and (b1='''') then
  1089. begin Putstr(KEYWORD);inc{3}(i);goto 99;end
  1090. else begin {3}
  1091. if b1='\' then
  1092. begin
  1093. StrAdd(b1);
  1094. StrAdd(b2[2]);
  1095. { this allows to save \? as ? even if \" appears}
  1096. inc{4}(i,2);
  1097. goto 1;
  1098. end;
  1099. StrAdd(b1);
  1100. inc{3}(i);
  1101. goto 1;
  1102. end; {3}
  1103. end {2}
  1104. end ; {1}
  1105. {$ENDIF}
  1106. { all the following executes AFTER check for in_string & in_comment }
  1107. if isa=is_special then begin{1}
  1108. {THIS part never appears in MIF input, since there is no is_special}
  1109. if pascal_comment then begin{2}
  1110. if (b2='(*')
  1111. then begin in_comment:=true;inc{4}(i,2);goto 1;end;
  1112. if (b1='{')and(b2[2]<>'$')
  1113. then begin in_comment:=true;inc{3}(i) ;goto 1;end;
  1114. end{2}
  1115. else
  1116. if c_comment then begin{2}
  1117. if b2='/*' then begin in_comment:=true;inc{4}(i,2);goto 1;end;
  1118. end {2}
  1119. else
  1120. if ada_comment then begin {2}
  1121. if b2='--' then begin Readline;goto 1; end;
  1122. end {2}
  1123. ;
  1124. if pascal_string then begin {2}
  1125. if b1='''' then begin StrBegin;is_2quote:=false;inc{3}(i);goto 1;end;
  1126. end {2}
  1127. else
  1128. if modula_string then begin {2}
  1129. if b1='''' then begin StrBegin;is_2quote:=false;inc{3}(i);goto 1;end;
  1130. if b1='"' then begin StrBegin;is_2quote:=true;inc{3}(i);goto 1;end;
  1131. end {2}
  1132. ;
  1133. if pascal_lexics then begin {2}
  1134. { SPECIALLY TAKES TURBO PASCAL DIRECTIVE-COMMENTS }
  1135. if (b2='{$') then begin {3}
  1136. j:=0;
  1137. repeat inc{3}(j);
  1138. until (s[i+j]='}')or(as[s[i+j]]=is_control);
  1139. if s[i+j]='}' then inc{3}(j);
  1140. PutAtom(j);
  1141. inc{4}(i,j);
  1142. goto 99;
  1143. end; {3}
  1144. { ADDITIONAL SYMBOLS }
  1145. if (b1='#')or(b1='$')or(b1='%')
  1146. then begin {3}
  1147. inc{3}(i);
  1148. J:=TAKE_LETTERS; { starts from i-th position }
  1149. i:=i-1;
  1150. PutAtom(J+1); { takes token from i-th position }
  1151. inc{4}(i,j+1);
  1152. goto 99;
  1153. end; {3}
  1154. end{2}
  1155. else
  1156. if c_lexics then begin {2} { SYMBOLS= > < }
  1157. { SPECIAL CASES FOR C LANGUAGE }
  1158. if (b3='<<=')or(b3='>>=') then begin
  1159. PutAtom(3); inc{4}(i,3); goto 99;
  1160. end;
  1161. isa:=is_first_of_two;
  1162. { !! in C case <<, >>, >=, <= will be tested further
  1163. in TWO_char_symbols section, hence we go to there
  1164. using assignment to "ISA" }
  1165. end {2}
  1166. end; {1}
  1167. if tex_lexics then begin{1}
  1168. (********* DESCRIPTION OF TeX Lexics *********
  1169. End_of_Line,(but not End_of_file) are NULL atom
  1170. \letters is command , where letters are A-Z,a-z.
  1171. @ is added to letters if we are in sty_lexics submode
  1172. Recognizable by #IDENT rule
  1173. \? is command , where ? is arbitrary.
  1174. Recognizable by #_KEYWORD rule
  1175. ? is command , where ? is one of special characters,
  1176. (all visible except letters and digits)
  1177. Recognizable by #_KEYWORD rule
  1178. ' ' is command. Every space-separately.
  1179. Recognizable as #ATOMs, not #IDATOM, not #_KEYWORD
  1180. letters,digits,spaces that can form unlimited
  1181. sequences.
  1182. Recognizable as #ATOMs, not #IDATOM, not #_KEYWORD
  1183. **********************************************)
  1184. (* My comments: End_of_File is not NULL atom,
  1185. because it is unclear how to stop main loop then.
  1186. #XXX is not accepted as camment bacause of \verbatim
  1187. environment *)
  1188. if b1='\' then begin {2}
  1189. if as[b2[2]]=is_letter then begin
  1190. { Command like \def, or \d@f \@ in sty_lexics }
  1191. j:=1;
  1192. while as[s[i+j]]=is_letter do inc(j);
  1193. { Non-loop condition: s[last_element]=' ', it is not a is_letter
  1194. character }
  1195. PutIT(IDATOM,j);
  1196. inc{4}(i,j);
  1197. goto 99;
  1198. end
  1199. else
  1200. if b2[2]=new_line_code then
  1201. (* THIS ADDITIONAL IF IS ADDED 4/5/1995 (!) *)
  1202. begin
  1203. PutIt(KEYWORD,1);
  1204. inc{4}(i,1);
  1205. goto 99;
  1206. end
  1207. else
  1208. begin
  1209. {Commands like \% \$ and so on; \@ if not sty_lexics }
  1210. PutIt(KEYWORD,2);
  1211. inc{4}(i,2);
  1212. goto 99;
  1213. end;
  1214. end;{2}
  1215. {My comment: \ is included in is_special, but it makes
  1216. no any difference}
  1217. if (isa=is_special) then begin PutIt(KEYWORD,1);inc{3}(i);goto 99;end;
  1218. {Separate character like $ ( ) etc. }
  1219. if b1=' ' then begin PutIt(ATOM ,1);inc{3}(i);goto 99;end;
  1220. {All other characters}
  1221. {all other characters simply form 80-byte atoms;
  1222. end of line and end of file cannot be included to it;
  1223. Tabulators are changed to spaces.}
  1224. j:=0;
  1225. while (as[s[i+j]]<>is_special)and(j<80)and
  1226. (s[i+j]<>new_line_code)and(s[i+j]<>endfile_code)
  1227. do begin
  1228. if s[i+j]=#9 then s[i+j]:=' ';
  1229. inc(j);
  1230. end;
  1231. PutIt(Atom,j);
  1232. inc{4}(i,j);
  1233. goto 99;
  1234. end;{1}
  1235. if isa=is_space then begin inc{3}(i); goto 1;end;
  1236. if isa=is_letter then begin
  1237. J:=TAKE_LETTERS;
  1238. PutIdent(J);
  1239. I:=I+J;
  1240. goto 99;
  1241. end;
  1242. if isa=is_digit then begin {1}
  1243. if c_lexics then begin {2}
  1244. { ADDITIONAL SYMBOLS; HERE hex and octal numbers are saved as
  1245. normal ATOMs }
  1246. if (b1='0')
  1247. then begin {3}
  1248. inc{3}(i);
  1249. J:=TAKE_LETTERS; { starts from i-th position }
  1250. i:=i-1;
  1251. PutAtom(J+1); { takes token from i-th position }
  1252. inc{4}(i,j+1);
  1253. goto 99;
  1254. end; {3}
  1255. end; {2}
  1256. i_saved:=i; { remember starting position }
  1257. AADR:=TAKE_DIGITS(J);
  1258. digits_before_dot:=j;
  1259. if (j<10)and
  1260. ( ((s[i+j]='.')and(s[i+j+1]='.'))
  1261. or
  1262. (not(s[i+j]in['e','E','.'])) )
  1263. then begin PutNumber;inc{4}(i,j);goto 99;end;
  1264. inc{4}(i,j);
  1265. if s[i]='.' then begin
  1266. inc{3}(i);
  1267. AADR:=TAKE_DIGITS(J);
  1268. digits_after_dot:=j;
  1269. inc{4}(i,j);
  1270. end;
  1271. if s[i] in ['e','E'] then
  1272. begin
  1273. digits_before_dot:=0;
  1274. digits_after_dot:=0;
  1275. inc{3}(i);
  1276. if s[i] in ['+','-'] then inc{3}(i);
  1277. AADR:=TAKE_DIGITS(J);
  1278. inc{4}(i,j);
  1279. end;
  1280. J:=i-i_saved;
  1281. i:=i_saved; { to set "i" to starting position }
  1282. PutFloat(J,digits_before_dot,digits_after_dot);
  1283. inc{4}(i,j);
  1284. goto 99;
  1285. end;{1}
  1286. if isa=is_first_of_two then begin {1}
  1287. if b2[2] in set_of_second_of_two then
  1288. for j:=1 to two_char_symbols_num do
  1289. if b2=two_char_symbols[j] then begin
  1290. PutAtom(2);I:=I+2;goto 99;
  1291. end
  1292. end;{1}
  1293. PutAtom(1);inc{3}(i);
  1294. end; { with}
  1295. 99:;
  1296. end; { Procedure }
  1297. Function TAKE_LETTERS:integer;
  1298. {**************************************}
  1299. label 99;
  1300. { Reads only letters, digits and underscores.
  1301. returns number of characters read }
  1302. var JJ:integer; c:char;
  1303. begin
  1304. JJ:=0;
  1305. while true do begin
  1306. c:=s[i+jj];
  1307. isa:=as[c];
  1308. if (isa=is_letter)or(isa=is_digit)or(isa=is_underscore)
  1309. then begin
  1310. if to_uppercase then s[i+jj]:=upcase_tab[c];
  1311. inc{3}(jj)
  1312. end
  1313. else begin
  1314. TAKE_LETTERS:=jj;goto 99;
  1315. end;
  1316. end;
  1317. 99:;
  1318. end;
  1319. Function TAKE_DIGITS(var jj:integer):longint;
  1320. {************************************************}
  1321. label 99;
  1322. var summator:longint; c:char;
  1323. begin
  1324. JJ:=0; summator:=0;
  1325. while true do begin
  1326. c:=s[i+jj];
  1327. if as[c]=is_digit then
  1328. begin summator:=summator*10+ord(c)-ord('0');inc{3}(jj);end
  1329. else begin TAKE_DIGITS:=summator;goto 99;end;
  1330. end;
  1331. 99:;
  1332. end;
  1333. (*begin*)
  1334. { PRESS F8 when debugging ! }
  1335. (*INITIALIZE_SCAN_VARIABLES;*)