langdesc.txt 65 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421
  1. .LP
  2. .ds RF Mikhail Auguston, University of Latvia
  3. .DA
  4. .AI
  5. University of Latvia
  6. Institute of Mathematics and Computer Science
  7. .TL
  8. RIGAL PROGRAMMING SYSTEM
  9. LANGUAGE DESCRIPTION
  10. .AU
  11. by MIKHAIL AUGUSTON
  12. .nr PS 9
  13. .nr VS 12
  14. .SH
  15. TABLE OF CONTENTS
  16. .LP
  17. ABSTRACT
  18. 1. Introduction
  19. 2. Implementation
  20. 3. Lexical Rules
  21. 4. Data
  22. 4.1 Atoms
  23. 4.2 Variables
  24. 4.3 Lists
  25. 4.4 Trees
  26. 5. Expressions
  27. 5.1 Accumulative Assignment Statements
  28. 5.2 Semantics of Variables
  29. 6. Rules
  30. 6.1. Simple Patterns
  31. 6.2. Assignment in Patterns
  32. 6.3 Rule Branches. Onfail Operations
  33. 6.4 Special Variables
  34. 7. Compound Patterns
  35. 7.1 List Pattern
  36. 7.2 Iterative Pattern of Sequence
  37. 7.3 Patterns for Alternative and Option
  38. 7.4 Tree Pattern
  39. 7.5 Iterative Tree Pattern
  40. 7.6 Names of Lists and Trees in Patterns
  41. 7.7 Patterns of Logical Condition Testing
  42. 8. Statements
  43. 8.1 Assignment Statement
  44. 8.2 Conditional Statement
  45. 8.3 FAIL Statement
  46. 8.4 Loop Statements
  47. 8.5 Rule Call
  48. 9. Input and Output
  49. 9.1 SAVE and LOAD Statements
  50. 9.2 Text Output
  51. 10. Program Structure
  52. 10.1 Local Variables
  53. 10.2 References to Variables of Other Rules
  54. 10.3 Attribute Grammars and RIGAL. Global Attributes
  55. 11. Built-in Rules
  56. 12. Sample Compiler
  57. 12.1 TOYLAN Language
  58. 12.2 Intermediate Form of Program.
  59. Abstract syntax tree
  60. 12.3 Target Language BAL
  61. 12.4 Main Module of Compiler
  62. 12.5 Parsing Phase
  63. 12.6 Code Generation Phase
  64. 13. Conclusions and Future Work
  65. References
  66. ABSTRACT
  67. A new programming language for compiler writing is described.
  68. The main data structures are atoms, lists and trees. The control
  69. structures are based on advanced pattern matching. All phases of
  70. compilation, including parsing, optimization and code generation,
  71. can be programmed in this language in short and readable form.
  72. Sample compiler written in RIGAL is presented.
  73. 1. Introduction
  74. Programming language RIGAL is intended to be a tool for
  75. parsing( context checking, diagnosing and neutralization of
  76. errors included ), for code optimization, code generation, static
  77. analysis of programs, as well as for the programming of
  78. preprocessors and convertors.
  79. Almost all the systems envisaged to solve compiler
  80. construction problems contain means to describe context-free
  81. grammar of the source language. Earlier systems, like the Floyd -
  82. Evans language, [1] present tools to work with stack, which is
  83. used for parsing. Parsing methods for limited grammar classes are
  84. implemented in the languages and systems of later generations
  85. (usually LL(1) or LR(1) ).
  86. Such systems as YACC (Johnson [2]), CDL-2 ( Koster [3]), SHAG
  87. (Agamirzyan [4]) and many others make use of synchronous
  88. implementation of parsing and different computations, e.g.,
  89. formation of tables, context checking, etc. Usually these actions
  90. are performed by call of semantic subroutines, written in some
  91. universal programming language ( e.g., in Pascal or C).
  92. Attribute grammars advanced by Knuth [5] have greatly
  93. influenced development of systems for compiler construction.
  94. Systems, like, SUPER (Serebryakov [6]), ELMA (Vooglaid, Lepp,
  95. Lijb [7]), MUG2 (Wilhelm [9]) are based on the use of attribute
  96. grammars not only for parsing, but for code generation as well.
  97. Pattern matching is a convenient tool for programming of
  98. parsing, optimization and code generation. The REFAL programming
  99. language [10], acknowledged for translator writing, may serve as
  100. a good example.
  101. Vienna method for defining semantics of programming languages
  102. [11] suggests the usage of labelled trees in order to present the
  103. abstract syntax of programs. Representation of compilation
  104. intermediate results in the tree form has become usual (see
  105. [12]).
  106. Dependence of control structures in the program from data
  107. structures used for program's work is one of the basic principles
  108. in programming. The recursive descent method could be considered
  109. to be the application of dependence principle.
  110. The above mentioned ideas and methods were taken into account
  111. when creating RIGAL language.
  112. The language possesses few basic notions. Data structures
  113. contain atoms, lists and trees. Advanced mechanism of pattern
  114. matching lies at the basis of control structures.
  115. The fact that RIGAL is a closed language makes RIGAL
  116. distinctive. That means that almost all the necessary
  117. computations and input-output could be executed by internal means
  118. and there is no need to use external semantic subroutines.
  119. Therefore the portability of RIGAL programs to other computers is
  120. increased.
  121. Means for work with trees, different patterns including,
  122. enable both programming of parsing algorithms and optimization
  123. phases and code generation as well. The language supports design
  124. of multipass translators. Trees are used as intermediate data.
  125. The language allows to split the program into small modules
  126. (rules) and presents various means to arrange interaction of
  127. these modules. Pattern matching is used for parameter passing.
  128. RIGAL supports attribute translation scheme and easy
  129. implementation of synthesized and inherited attributes is
  130. possible. The problem of global attributes is solved by usage of
  131. special references.
  132. Lexical analysis is a separate task and requires special
  133. language facilities for description as it is, for example, in
  134. LEX/YACC [2] system. In the current implementation of RIGAL two
  135. scanners are included that accept lexics of Pascal, C,
  136. Modula-2 and RIGAL.
  137. 2. Implementation
  138. RIGAL was designed and implemented in the Computing Center of
  139. Latvia University in years 1987-1988. The first implementation
  140. was for PDP-11 in RSX-11.
  141. At the present stage RIGAL interpreter has been developed and
  142. optimizing compiler RIGAL -> Pascal has been implemented by means
  143. of RIGAL itself. The interpreter and the compiler have been
  144. ported to VAX/VMS , IBM PC XT/AT (MS-DOS and MS-Windows) and
  145. Unix/SUN environments.
  146. 3. Lexical Rules
  147. The text of RIGAL program is a sequence of tokens - atoms
  148. (e.g., identifiers and integers ), keywords (e.g., if, RETURN ),
  149. special symbols (e.g., +, ## ), names of variables and rules
  150. (e.g., $A, #L ). Tokens may be surrounded by any number of
  151. blanks. A comment is any string of symbols that begins with two
  152. consecutive symbols '-' (minus ). The end of the comment is the
  153. end of the line. For example,
  154. #Sum -- rule for addition of two numbers
  155. $N1 -- the first number
  156. $N2 -- the second number
  157. / RETURN $N1 + $N2 / -- return of the result
  158. ##
  159. 4. Data
  160. 4.1 Atoms
  161. An atom is a string of symbols. If the atom is an identifier
  162. ( the first symbol is a letter followed by letters or digits or
  163. underscore symbols), in the text of RIGAL program it could be
  164. written directly: AABC total_number x25
  165. Numerical atoms are integers, for instance, 2, 187, 0, -25
  166. In other cases the atom is quoted: '+' ':=' '1st'
  167. Some identifiers are reserved as keywords in RIGAL. If they
  168. are used as RIGAL atoms, they should be quoted. For example,
  169. 'IF', 'RETURN'. Besides, any atom, which is an identifier, also
  170. can be quoted - ABC and 'ABC' represent one and the same atom.
  171. It should be noted that 25 and '25' are different atoms, the
  172. latter is just a string of symbols '2' and '5'.
  173. Two special atoms are distinguished in the language.
  174. NULL - this atom is frequently yielded as a result of
  175. different operations, if something was incorrect in the process
  176. of computations. This atom also represents an empty list, an
  177. empty tree and Boolean value "false".
  178. T - usually this atom is yielded by logical operations and
  179. represents value "true".
  180. 4.2 Variables
  181. The name of a variable must begin with the symbol $, followed
  182. by an identifier. Value can be assigned to a variable, for
  183. example, by the help of assignment statement: $E := A
  184. In this case the atom A becomes value of the variable $E.
  185. In RIGAL variables have no types, the same variable may have
  186. an atom, a list or a tree as a value in different time moments.
  187. 4.3 Lists
  188. Ordered sequences, i.e., lists can be composed from atoms and
  189. from other lists and trees, as well. A special function - list
  190. constructor serves for list formation. For instance, (. A B C .)
  191. forms a list of three atoms A, B and C.
  192. Arguments of the list constructor may be expressions. The
  193. sample $E := (. (. 8 14 7 .) (. A B .) .) could be rewritten as
  194. follows:
  195. $A := (. 8 14 7 .); $B := (. A B .); $E := (. $A $B .);
  196. Separate elements of the list can be selected by indexing.
  197. Hence, $B [1] is atom A, $A [2] is atom 14, $E [2] is list
  198. (. A B .), but $E [10] is atom NULL
  199. If the value of the index is a negative number, for instance
  200. -N, then the N-th element, beginning from the end of the list, is
  201. selected. For example, $A [-1] is atom 7.
  202. The necessity to add one more element to the list is quite
  203. common. Operation !. is envisaged for this purpose.
  204. Example. (. A B .) !. C yields the list (. A B C .)
  205. To link two lists in a new list the operation !! is applied (
  206. list concatenation). For instance, (. A B .) !! (. C D .) yields
  207. (. A B C D .).
  208. 4.4 Trees
  209. Tree constructor is used to create a tree. For example,
  210. <. A : B, C : D .>
  211. One can imagine tree as a graph, the nodes and arches of
  212. which are marked by some objects.
  213. Objects before ':' in the tree constructor are named
  214. selectors. In the given implementation solely atoms, which are
  215. identifiers ( except NULL ), may serve as selectors. In the
  216. graphical representation selectors correspond to arches of the
  217. graph. All selectors of one and the same level in the tree must
  218. be different.
  219. Any object - atom, list or tree, except atom NULL, may
  220. correspond to terminal nodes of the graph ( "leaves" of the
  221. tree). Hence, multilayer trees can be built. For instance,
  222. <. A : B, C : <. D : E, F : G .> .>
  223. Pair "selector : object" in the tree is named branch of the
  224. tree. Branches are unordered in the tree.
  225. Likewise for the list constructor, the tree constructor may
  226. be described by expressions ( in both selector and object
  227. places), for instance,
  228. $X := D; $B := (. 2 8 .);
  229. $C := <. A : <. M : K .>, $X : '+' , E : $B .>;
  230. Select operation serves to extract the tree component. It is
  231. in the following form: tree . sel , where tree is the expression,
  232. whose value must be some tree, but sel is the expression, whose
  233. value must be an atom-identifier.
  234. Consequently,
  235. $C . A is the tree <. M : K .> ,
  236. $C . D is the
  237. atom '+' ,
  238. $C . E is the list (. 2 8 .) ,
  239. $C . E[2] is the atom 8
  240. , $C . A . M is the atom K
  241. If there is no branch with a given selector in the tree, then
  242. the result is NULL: $C . W is atom NULL.
  243. Operation of the tree "addition" is performed as well: T1 ++
  244. T2 , where T1 and T2 are trees. Tree T2 branches are added to the
  245. tree T1 one by one. If in the tree T1 there already exists a
  246. branch with the same selector, the branch is substituted by a new
  247. one. Therefore, the operation "++" is not commutative.
  248. It should be pointed out that the tree constructor is
  249. computed from left to right, i.e.,
  250. <. s1 : a1, s2 : a2, s3 : a3.>
  251. gives the same result as the expression
  252. (( NULL ++ <. s1 : a1 .> ) ++ <. s2 : a2 .>) ++ <. s3 : a3 .>
  253. 5. Expressions
  254. Operations = and <> serve for the comparison of objects. The
  255. result of the comparison is either T ("true") or NULL ("false").
  256. Atoms are matched directly, for instance, a = b gives NULL,
  257. 25 = 25 gives T, 17 <> 25 gives T.
  258. Lists are considered equal iff they contain equal number of
  259. components and if these components are equal respectively.
  260. Trees are considered equal iff they contain equal number of
  261. branches and if one of the trees contains the branch "S : OB",
  262. then the other tree also contains the branch "S : OB1" and OB =
  263. OB1.
  264. Arithmetical operations +, -, *, DIV, MOD are assigned for
  265. numerical atoms. The essence of these operations is similar to
  266. those in Pascal. The result of an arithmetical operation is a
  267. numerical atom. Atom NULL is also admitted as the argument of
  268. arithmetical operation, in this case integer 0 is supposed to be
  269. its value. Under matching these atoms are considered different,
  270. i.e., NULL = 0 gives NULL.
  271. Besides the operations = and <> numerical values could be
  272. compared by the help of >, <, >= and <=.
  273. Logical operations AND, OR and NOT usually are applied under
  274. conditions in conditional statements. Their arguments may be
  275. arbitrary objects. If the object differs from NULL, it is
  276. supposed to have the value "true" in a logical operation. Atom
  277. NULL represents the "false" value. The result of a logical
  278. operation always is either T or NULL.
  279. In order to make complex hierarchic objects ( trees and lists
  280. of complex structure ) more visual and to improve the work of
  281. pattern matching, trees and lists may be labelled. A name is an
  282. atom opposed to the root node of the tree or the whole list.
  283. Labelling operation is written the following way:
  284. A :: OB
  285. where A is an atom , OB is
  286. an object (a tree or a list). For example,
  287. Add_op :: <. arg1 : 5, arg2 : 4 .>
  288. The execution order of operations in the expression is
  289. controlled by parentheses "(" and ")". Priority is assigned to
  290. every operation, and, if the execution order is not defined by
  291. parentheses, operations are executed according to their
  292. priorities - beginning with the higher and proceeding to the
  293. lower.
  294. Operations are listed in decreasing order of priorities (some
  295. operations will be discussed later).
  296. 1) Rule call, list constructor, tree constructor, last
  297. 2) Selector ".", index "[]", ::
  298. 3) NOT, unary -
  299. 4) *, DIV, MOD
  300. 5) !. , !! , ++ , +, binary -
  301. 6) = , <> , > , < , >= , <=
  302. 7) AND
  303. 8) OR
  304. Binary operations of the same priority are executed from left
  305. to right, while unary operations from right to left.
  306. 5.1 Accumulative Assignment Statements
  307. It is quite often that working with a list or a tree elements
  308. are added step by step, thus the growing object is retained in
  309. the same variable. Therefore short form of assignment statement
  310. has been introduced. For the operations !. , !! , ++ and +
  311. statements of the form $X := $X op Expr can be written as
  312. $X op := Expr .
  313. 5.2 Semantics of Variables
  314. In the implementation of RIGAL every object - atom, list or
  315. tree has a descriptor. It is a special data structure that
  316. contains some information about this object: the value of atom,
  317. the number of elements and pointers to elements for lists and
  318. trees. Variables have pointers to object descriptors as values.
  319. Statement $X := OB assigns to the variable $X a pointer to
  320. the descriptor of object OB. After the execution of the statement
  321. $Y := $X both variables $X and $Y contain pointers to the same
  322. object OB.
  323. Operations !.:= , !!:= , +:= and ++:= change the descriptor
  324. of their first argument, i.e., have a side effect. Sometimes it
  325. can be undesirable. For example,
  326. $A := (. 3 17 .); $B := $A; $B !.:= 25;
  327. The value of $B becomes list (. 3 17 25 .), but operation
  328. !.:= has added element 25 immediately to the list (. 3 17 .), and
  329. the descriptor of this list is changed. As the value of the
  330. variable $A was a pointer to this descriptor, then after the
  331. execution of the statement $B !.:= 25 the value of the variable
  332. $A is changed, too.
  333. To prevent this, we must have a copy of the object, to which
  334. $A refers to before assigning it to $B. Operation COPY( OB )
  335. is used for this purpose. It makes a copy of the descriptor of
  336. the object OB.
  337. Now we can write a "safe" call of the operation !.:= in such
  338. a way: $A := (. 3 17 .); $B := COPY( $A); $B !.:= 25;
  339. As a result $B takes the value (. 3 17 25 .), but $A remains
  340. equal to (. 3 17 .). The same effect can be obtained after
  341. execution of statements $A := (. 3 17 .); $B := $A !. 25;
  342. 6. Rules
  343. The concept of rule is analogous to concepts of procedure and
  344. function in conventional languages, such as Pascal or C.
  345. First of all, by the help of a rule we can check, whether an
  346. object or a sequence of objects complies with some grammar. For
  347. this purpose rule has a pattern. Objects, assigned under rule
  348. call (arguments of the rule), are matched with the pattern. If
  349. there is a necessity, some operations could be executed, for
  350. instance, computations and input-output operations,
  351. simultaneously with rule arguments and pattern matching.
  352. Rule that is called can compute and return back some value
  353. (object), i.e., it can be used as function.
  354. Depending on the result of rule arguments and pattern
  355. matching, the rule call ends with success or failure. Thus the
  356. rule call could be used in another rule patterns.
  357. 6.1. Simple Patterns
  358. Definition of the rule begins with the indication of the name
  359. of the rule in the form of #LLL , where LLL is an
  360. atom-identifier. In the most common case the pattern is indicated
  361. after the name of the rule. Rule definition ends with symbol
  362. '##'. For instance,
  363. #L1 A B ##
  364. In this case the pattern consists of atoms A and B. Call of
  365. the rule #L1 takes place, for instance, the following way :
  366. #L1 ( A B )
  367. The sequence of objects - atoms A and B, is assigned as rule
  368. arguments.
  369. After rule call the argument and pattern matching begins. The
  370. first argument - atom A is matched with the first pattern, which
  371. is also an atom. These atoms are equal, so their matching is
  372. successful. After that the next object from the sequence of
  373. arguments - atom B and the next pattern, also atom B, are
  374. matched. Their matching is also successful, therefore, the call
  375. of the rule #L1 is successful, too.
  376. #L1( A C ) call fails, because the second argument - atom C
  377. was not successfully matched with the pattern - atom B.
  378. #L1( A ) call fails, as there is no object for the second
  379. pattern with which it could be matched successfully.
  380. But #L1( A B C) call is successful, as the third rule
  381. argument - atom C was not demanded by any pattern and matching of
  382. the first two rule arguments was successful.
  383. Arbitrary atoms, both non-numerical and numerical, may be
  384. described as patterns.
  385. Such operations as assignment statements, input - output and
  386. others may be indicated before the pattern, after it and between
  387. patterns. These statements are quoted in the pair of symbols '/'
  388. . If there is a necessity to write several statements within the
  389. pair '/' , these statements are separated by the symbol ';' .
  390. A group of statements is executed in the rule pattern, if
  391. matching of the previous pattern with the corresponding rule
  392. arguments was successful.
  393. The value returned by the rule is worked out by statement
  394. RETURN . It has the following form: RETURN expression.
  395. Simultaneously this statement completes the execution of the
  396. rule.
  397. Example.
  398. #L2 'begin' 'end' / RETURN 'The pair begin-end' / ##
  399. Rule call is illustrated in the following example.
  400. $A := #L2 ( 'begin' 'end' );
  401. As a result the atom
  402. 'Thetpairtbegin-end' is assigned to the variable $A.
  403. If RETURN statement is not described in the rule, then the
  404. NULL value is returned.
  405. If the rule call ends in failure, then usually value NULL is
  406. returned, although in case of failure, it is possible to work out
  407. the returned value, which is not NULL; for this purpose statement
  408. RETURN must be used in onfail-operations (see sect.6.3.).
  409. Variable could be used as pattern. It is matched with one
  410. object (atom, list or tree) from the sequence of rule arguments.
  411. Matching always ends in success, and as a side effect the
  412. variable obtains this object as value. For example,
  413. #L3 $A $B /RETURN (. $B $A .)/ ##
  414. After the call $X := #L3 ( 1 2) the variable $X obtains as
  415. value (. 2 1 .)
  416. The rule pattern, in its turn, may refer to some rule
  417. (recursively, as well). Then the subsequence of the calling rule
  418. arguments become arguments of rule - pattern. If the call of the
  419. rule - pattern is successful, then matching of further patterns
  420. of the calling rule with the remaining arguments proceeds. If the
  421. call of the rule - pattern fails, then the pattern matching of
  422. the calling rule fails, too. For example,
  423. #L4 A #L5 D ##
  424. #L5 B C ##
  425. Then the call #L4( A B C D) is successful, but the call
  426. #L4( A E F D) is unsuccessful.
  427. There is a number of built-in rules in the language (see
  428. section 11.). For instance, #NUMBER is successfully matched with
  429. a numerical atom and returns it as a value, other arguments fail
  430. and return NULL. #IDENT is successfully matched with atom -
  431. identifier and returns it as value.
  432. 6.2. Assignment in Patterns
  433. Every pattern, when successfully matched with the
  434. corresponding rule argument, returns some value. The value of
  435. atom pattern coincides with this atom, the value of variable
  436. pattern coincides with the value obtained by this variable as a
  437. result of matching with the arguments. The value of rule pattern
  438. is defined by statement RETURN in this rule.
  439. If the matching ends in failure, the pattern usually returns
  440. value NULL.
  441. These values, returned by patterns, can be assigned at once
  442. to some variable. It is enough to write the name of the variable
  443. and the assignment symbol ':=' before the pattern element.
  444. Example.
  445. #L6 $A $R := #L7 / RETURN (. $A .) !! $R / ##
  446. #L7 $B $C / RETURN (. $B $C .) / ##
  447. After execution of the statement $X := #L6 ( 1 2 3) the value
  448. of $X will be (. 1 2 3 .)
  449. Symbols of accumulative assignment '!.:=' , '!!:=' , '++:='
  450. and '+:=' can be used instead of ':=' in patterns .
  451. Therefore, we can rewrite the previous example the following
  452. way:
  453. #L6_1 $R !.:= $A $R !!:= #L7_1 / RETURN $R / ##
  454. #L7_1 $M !.:= $B $M !.:= $C / RETURN $M / ##
  455. It should be noted that all variables in the rule are
  456. initialized by value NULL, so the value of the expression NULL !.
  457. $A that equals to (. $A .) is assigned to the variable $R by the
  458. first application of the pattern $R !.:= $A in #L6_1.
  459. Patterns of the type $N := #NUMBER or $ID := #IDENT are used
  460. very often, therefore, following defaults are introduced in the
  461. language. If the first letter of the variable name is N, then
  462. this variable, having been used as pattern element, will have a
  463. successful matching only with a numerical atom, in other cases
  464. matching ends in failure, and variable obtains value NULL. If the
  465. first letter of the name of the variable is I, this variable is
  466. matched successfully only with an atom-identifier.
  467. 6.3 Rule Branches. Onfail Operations
  468. Several groups of patterns may be united in one rule.
  469. The first group of patterns is applied to the rule arguments
  470. first. If matching of this group of patterns with the arguments
  471. is successful, the rule call is successful. But if matching has
  472. failed, transition to the next group of patterns takes place, and
  473. it is matched with the same arguments. It goes on until some
  474. group of rule patterns is matched with the arguments
  475. successfully. If not a single pattern group matches successfully
  476. with rule arguments, the rule call ends in a failure.
  477. Such alternative pattern groups are called rule branches,
  478. and, when writing the rule, they are separated by the symbol
  479. ';;'.
  480. If the branch fails, the execution of its patterns and
  481. statements is abandoned at the place, where branch pattern
  482. failed, and control is transferred to the next branch (if there
  483. is one) or the whole rule fails (if the current branch is the
  484. last one in the rule). Still there is a possibility to execute
  485. some operations before exit from the branch.
  486. ONFAIL operation is a sequence of statements, written at the
  487. end of the branch and delimited from patterns and branch
  488. statements by keyword ONFAIL.
  489. If ONFAIL-statements are described in the branch, then in
  490. case of branch failure, control is transferred to them and
  491. statements, given in ONFAIL-unit , are executed. So, in case of
  492. branch failure, causes of failure can be analyzed and message can
  493. be output. Statement RETURN can be executed in ONFAIL operations,
  494. as well. Then exit from the rule takes place (with failure), and
  495. some other value than NULL can be returned.
  496. 6.4 Special Variables
  497. Special variable without name $ denotes the first rule
  498. argument matched by a rule pattern.
  499. The value of special variable $$ equals to the value of
  500. current rule argument, to which current rule pattern is applied.
  501. 7. Compound Patterns
  502. Lists, sequences of elements in lists and trees can be
  503. analyzed by patterns. Nesting of patterns practically is
  504. unlimited. It is allowed to insert statements before, after and
  505. within any pattern.
  506. 7.1 List Pattern
  507. List pattern is written in the rule the following way:
  508. (. S1 S2 ... SN .)
  509. where S1, S2, ..., SN are patterns . For instance,
  510. #L8 (. $E1 $E2 .) ##
  511. Pattern of the rule #L8 is matched successfully with any
  512. list, containing precisely two elements. Such call is successful:
  513. #L8( (. (. 1 2 .) <. A : B .> .) )
  514. But the following calls end in failure.
  515. #L8( A B ) - because
  516. pattern will be applied to the first argument, i.e., to atom A,
  517. #L8( (. 13 .) ) - because the argument is one element list.
  518. In case of success the list pattern yields value that can be
  519. assigned to some variable. This value coincides with the whole
  520. list, to which the pattern was applied.
  521. 7.2 Iterative Pattern of Sequence
  522. In RIGAL the following pattern is defined for sequence
  523. recognition: (* S1 S2 ... SN*) , where S1 , S2, ... SN are some
  524. patterns. This pattern describes the repetition of enclosed
  525. sequence of patterns zero or several times.
  526. Rules with a variable number of arguments can be defined by
  527. iterative pattern of sequence. For example,
  528. #Sum (* $S +:= $N *) / RETURN $S / ##
  529. This rule is used for summing up any amount of numbers.
  530. #Sum( 2 5 11) = 18 and #Sum( 3 ) = 3
  531. Iterative pattern is very often used within list pattern. For
  532. instance, the following rule counts the number of list elements.
  533. #Length (. / $L := 0 / (* $E / $L +:= 1 / *) .) / RETURN $L / ##
  534. Samples of rule call.
  535. #Length( (. A B C .) ) = 3 and #Length( NULL ) = 0
  536. Iterative pattern (+ S1 S2 ... SN +) is analogous to the
  537. pattern (* ... *), but assigns the repetition of enclosed pattern
  538. sequence one or several times.
  539. In the iterative pattern of sequence the element delimiter is
  540. indicated in the form of (* S1 S2 ... SN * Delimiter ) or (+ S1
  541. S2 ... SN + Delimiter +)
  542. Atom or name of the rule may serve as Delimiter .
  543. Example. Analysis of a simple Algol-like declaration. A
  544. fragment of variable table coded in a tree form is returned as a
  545. result.
  546. #Declaration $Type := ( integer ! real )
  547. (+ $Id / $Rez ++:= <. $Id : $Type .> / + ',')
  548. / RETURN $Rez / ##
  549. Call #Declaration ( real X ',' Y ) returns value
  550. <. X : real, Y : real .>
  551. It should be noted that the pattern (* $E * ',' ) differs
  552. from the pattern (* $E ',' *) in the point that the presence of
  553. atom ',' is obligatory in the second pattern at the end of
  554. sequence.
  555. 7.3 Patterns for Alternative and Option
  556. The choice of several possible patterns is written the
  557. following way: ( S1 ! S2 ! ... ! SN )
  558. Patterns S1 , S2 , ... , SN are applied one by one from left
  559. to right, until one of them succeeds.
  560. In case of success alternative pattern yields value. It
  561. coincides with the value of the successful pattern within the
  562. alternative and may be assigned by some variable.
  563. Example. Simple arithmetic expression parsing. When
  564. successful, an expression tree is returned, which can be regarded
  565. as an intermediate form for the next compilation phases.
  566. #Expression $A1 := #Term
  567. (* $Op := ( '+' ! '-' ) $A2 := #Term
  568. / $A1 := <. op : $Op , arg1 : $A1 , arg2 : $A2 .> / *)
  569. / RETURN $A1 / ##
  570. #Term $A := ( $Id ! $Num ) / RETURN $A / ;;
  571. '(' $A := #Expression ')' / RETURN $A / ##
  572. The call #Expression( X '-' Y '+' 7 ) returns the value
  573. <. op: '+', arg1: <. op: '-', arg1: X, arg2: Y .>, arg2: 7 .>
  574. In RIGAL we may write a rule that matches successfully with
  575. an empty sequence of arguments:
  576. #empty ##
  577. Now the pattern for option can be written down: ( S ! #empty )
  578. In short form this issue may be written down in RIGAL the
  579. following way: [ S ] where S is some pattern or pattern sequence.
  580. Pattern [ S ] always ends in success.
  581. 7.4 Tree Pattern
  582. Tree pattern checks, whether the object is a tree with fixed
  583. structure. By means of this pattern access to the components of
  584. the tree is obtained. The tree pattern is described the following
  585. way:
  586. <. Sel1 : Pat1, Sel2 : Pat2, ... , SelN : PatN .>
  587. where
  588. Sel1, Sel2, ... SelN are atoms-identifiers, but Pat1, Pat2, ...
  589. PatN are patterns.
  590. If the object, to which the tree pattern is applied, is not a
  591. tree, then the application of the pattern fails at once. If there
  592. is the selector Sel1 in the tree, then the pattern Pat1 is
  593. applied to the corresponding object. If there is no selector Sel1
  594. in the tree or the application of Pat1 has failed, the whole
  595. pattern also fails.
  596. If matching the first branch was successful, branch matching
  597. of the pattern 'Sel2 : Pat2' , etc. begins.
  598. Hence, pattern branches are applied to the tree in the same
  599. order as they are written in the pattern. Therefore, the order of
  600. tree traversing may be controlled. It is possible to have
  601. reiterative visit of branches ( if selectors are repeatedly
  602. described in the tree pattern) or omission of branches (if
  603. corresponding selectors are not given in the pattern).
  604. In case of success the tree pattern returns the value, which
  605. coincides with the whole object - a tree, to which the pattern
  606. was applied, irrespective of presence of all tree selectors in
  607. the pattern or absence of some.
  608. Example. Let us suppose expression tree to be formed like in
  609. the above example. The task is to traverse the tree and return a
  610. list that represents the Polish postfix form of this expression.
  611. #Postfix_form <. arg1: $Rez := #Postfix_form,
  612. arg2: $Rez !!:= #Postfix_form,
  613. op: $Rez !.:= $Op .> / RETURN $Rez / ;;
  614. $Rez := ( $Id ! $Num ) / RETURN (. $Rez .) / ##
  615. The call
  616. #Postfix_form( <. op: '-',
  617. arg1: X,
  618. arg2: <. op:'+',
  619. arg1: Y,
  620. arg2: 5 .> .>)
  621. returns the value
  622. (. X Y 5 '+' '-'.)
  623. Some branches in the tree pattern may be described as
  624. optional, in this case they are enclosed in brackets '[' and ']'
  625. . If there is no selector of optional branch in the argument
  626. tree, its pattern is omitted and transition to next pattern
  627. branch takes place. If there is a selector of the type in the
  628. argument tree, the pattern branch is developed as usual.
  629. 7.5 Iterative Tree Pattern
  630. The simplest form of iterative tree pattern is the following:
  631. <* $Var : P *>
  632. where $Var is some
  633. variable, and P is a pattern.
  634. A loop over the tree is performed by the help of this
  635. pattern. All selectors of the argument tree are assigned to the
  636. variable $Var one by one. The pattern P is applied to each
  637. object, which corresponds in the argument tree to the current
  638. selector in the variable $Var. If even one application of the
  639. pattern P fails, the whole iterative tree pattern fails. For
  640. example,
  641. #Variable_table <* $Id : $E := ( integer ! real )
  642. / $R !.:= (. $Id $E .)/ *> / RETURN $R / ##
  643. Call example.
  644. #Variable_table( <. X : integer, Y : real, Z : real .> ) =
  645. (. (. X integer .) (. Y real .) (. Z real .) .)
  646. Sometimes, performing a loop over the tree, some branches
  647. should be updated in a special way. For this purpose iterative
  648. tree pattern with distinguished branches is used.
  649. <* Sel1 : Pat1, Sel2 : Pat2, ..., SelN : PatN, $Var : P *>
  650. where Sel1, Sel2, ... SelN are atoms-identifiers;
  651. Pat1, Pat2, ... PatN are patterns,
  652. i.e., like elements in simple tree pattern;
  653. $Var is a variable, but P is a pattern, as in simple case of
  654. iterative tree pattern.
  655. The pattern is applied to the argument tree the following
  656. way. First of all distinguished pattern branches 'Sel : Pat' are
  657. developed. Their matching with branches of the argument tree
  658. happens exactly the same way as with simple tree pattern. Then
  659. the element '$Var : P' is applied to other branches of the
  660. argument tree the same way as in simple iterative tree pattern.
  661. Some distinguished branches can be optional, for this purpose
  662. they are enclosed in brackets '[' and ']'. Semantics is the same
  663. as in the case of simple tree pattern.
  664. Example. Let it be a tree of arbitrary size. In some of its
  665. subtrees there is the selector LABEL, to which numerical atom is
  666. attached. All these numbers over the whole tree must be collected
  667. in a list, and the list must be returned as result. #Label_list
  668. <* [ LABEL : $Result !.:= $N ],
  669. $S : $Result !!:= #Label_list *> / RETURN $Result / ;;
  670. $E ##
  671. The rule has two branches. Traversing of the tree and its
  672. subtrees is described in the first branch. The resulting list is
  673. formed in the variable $Result. The traversing of subtrees is
  674. carried out by the help of recursive call of the rule
  675. #Label_list. The second branch of the rule consisting of just one
  676. pattern $E is applied at the leaves of the tree. The pattern
  677. matches successfully with any object and the whole branch returns
  678. value NULL, which is accepted as empty list at the previous level
  679. of recursion.
  680. Call sample.
  681. #Label_list ( <. A: <. LABEL: 5, B: abc .>,
  682. LABEL: 17, D: 25 .> ) = (. 17 5 .)
  683. 7.6 Names of Lists and Trees in Patterns
  684. The assignment of names was discussed in Section 5.
  685. In list and tree patterns we can have matching of list and
  686. tree names with the described values or simply get these names in
  687. the described variable. For this purpose the name may be
  688. indicated before list or tree pattern: atom :: pattern
  689. If the atom described in the pattern coincides with the name
  690. of the list (or tree), to which the pattern is applied, the
  691. application of the pattern to the argument begins. If it does
  692. not, other pattern elements are not applied and the pattern
  693. fails.
  694. To obtain access to the name of the argument, instead of atom
  695. we must indicate the variable, in which the name is assigned as
  696. value.
  697. 7.7 Patterns of Logical Condition Testing
  698. Pattern of the type: S' ( expression ) works the following
  699. way. First of all the expression is evaluated. If its value
  700. differs from NULL, the pattern is successful. The value of the
  701. matched argument is returned as the value of the pattern, if
  702. matching was successful. If the value of the expression equals to
  703. NULL, the pattern fails and returns NULL.
  704. The value of special variable $$ in the expression of
  705. S-pattern equals to the value of the argument, to which S-pattern
  706. is applied.
  707. The skip of token sequence until the nearest symbol ';' is
  708. described by the pattern: (* S' ( $$ <> ';' ) *)
  709. Let under parsing a case is accentuated when the assignment
  710. statement is in the form of X := X + E , where X is a variable,
  711. and E is an expression. This case could be described by pattern
  712. of the type: $Id ':=' S' ( $$ = $Id ) '+' #Expression
  713. Pattern of the type: V' ( expression ) works similar to
  714. S-pattern, yet in case of success no advancing along the sequence
  715. of rule arguments takes place, so the next pattern element is
  716. applied to the same argument.
  717. This pattern is useful for context condition check. Example.
  718. The pattern
  719. S' ( #NUMBER($$) and ( $$ > 7))
  720. may be substituted by
  721. a sequence of patterns
  722. $Num V'($Num > 7)
  723. 8. Statements
  724. 8.1 Assignment Statement
  725. In the left side of assignment statement a variable may be
  726. indicated, which is followed by an arbitrary number of list
  727. indexes and/or tree selectors. For example,
  728. $X := (. A B C .); $Y:= <. D : E, F : G .>;
  729. After assignment $X[2] := T the value of $X is (. A T C .) .
  730. After assignment $Y.D :=17 the value of $Y is <.D :17, F:G .>
  731. The execution of the statement $Y.A := T yields the run time
  732. error message. The necessary result is obtained the following
  733. way:
  734. $Y ++:= <. A : T .>;
  735. Tree branch is deleted by assigning an empty object to the
  736. corresponding selector: $Y.D := NULL;
  737. 8.2 Conditional Statement
  738. Conditional statement has the following form:
  739. IF expression -> statements
  740. Then branches may
  741. follow (it is not compulsory)
  742. ELSIF expression -> statements
  743. Conditional
  744. statement ends with keyword FI.
  745. In conditional statement branches expressions are computed
  746. one by one, until a value different from NULL is obtained. Then
  747. the statements described in this branch are executed.
  748. 8.3 FAIL Statement
  749. FAIL statement finishes the execution of the rule branch with
  750. failure. Example. In order to repair errors in parsing process,
  751. the sequence of tokens should be skipped quite frequently, for
  752. instance, until semicolon symbol. It is done the following way.
  753. #statement ... ;; -- branches for statement analysis
  754. (* #Not_semicolon *) ';' -- no statement is recognised
  755. ##
  756. #Not_semicolon $E / IF $E = ';' -> FAIL FI/ ##
  757. 8.4 Loop Statements
  758. Statement of the types
  759. FORALL $VAR IN expression
  760. DO statements OD
  761. FORALL SELECTORS $VAR BRANCHES $VAR1 IN expression
  762. DO statements OD
  763. FORALL BRANCHES $VAR1 IN expression
  764. DO statements OD
  765. loops over a list or a tree.
  766. The value of the expression must be either a list or a tree.
  767. Value of the current list element (if the loop is over the list)
  768. or value of the current selector (if the loop is over the tree)
  769. is assigned to the loop variable $VAR one by one. The
  770. corresponding branch value is assigned to the variable $VAR1 (if
  771. the loop is over the tree). Statements, describing body of the
  772. loop, may use the current value of the variables $VAR and $VAR1.
  773. Loop statement of the type
  774. LOOP statements END;
  775. repeats statements of
  776. the loop body, until one of the statements - BREAK, RETURN or
  777. FAIL is not executed.
  778. 8.5 Rule Call
  779. If a rule is called just to execute statements described in
  780. it, and value returned by the rule is not necessary, the rule
  781. call is written down as statement. It is analogous to procedure
  782. call in traditional programming languages. Success/failure of the
  783. rule and value returned by it is disregarded in such a call.
  784. 9. Input and Output
  785. 9.1 SAVE and LOAD Statements
  786. Objects created by RIGAL program (atoms, lists, trees) can be
  787. saved in the file and loaded back to the memory.
  788. Statement
  789. SAVE $Var file-specification
  790. unloads the object,
  791. which is the value of the variable $Var to the file with the
  792. given specification. File, formed by SAVE statement, contains
  793. precisely one object (atom, list or tree).
  794. We can load the object from the file in the memory having
  795. executed statement:
  796. LOAD $Var file-specification
  797. 9.2 Text Output
  798. To output texts (messages, generated object codes, etc. )
  799. several text files can be opened in the RIGAL program. The text
  800. file FFF is opened by statement: OPEN FFF file-specification
  801. File-specification may be an expression. It presents the name of
  802. the file on the device.
  803. Statement of the type
  804. FFF << Expr1 Expr2 ... ExprN
  805. outputs a sequence
  806. of atoms to the file FFF. Values of expressions Expr1, Expr2, ...
  807. are either atoms or lists consisting of objects, different from
  808. trees.
  809. This statement outputs atoms as sequences of symbols to the
  810. text file, inserting a blank after every atom. Atoms in the list
  811. are output in the same order as they are in the list.
  812. Example. FFF << A B 12 ;
  813. A string of characters is output in the text file FFF the
  814. following way: "A B 12 "
  815. Symbol @ in the sequence of expressions of output statement
  816. switches over to other output mode of blanks separating atoms. By
  817. default at the beginning of execution of text output statement
  818. the output mode with blanks is on.
  819. Example. FFF << A B @ C D 25 @ E F 57 ;
  820. The following string of characters is output to the text file
  821. "A B CD25E F 57"
  822. Statement of the type
  823. FFF << ...
  824. always begins output with
  825. the beginning of a new record. Output statement of the type
  826. FFF <] ...
  827. continues output in the current record.
  828. By the help of the statement of the type
  829. PRINT expression
  830. the value of atom, list
  831. or tree can be output in a readable form on the display, disk or
  832. on the printer. It is useful when debugging RIGAL programs.
  833. 10. Program Structure
  834. Program written in the RIGAL language consists of the main
  835. program and rules. The main program text must be at the beginning
  836. of the RIGAL program text, but the text of the rules is written
  837. afterwards. Main program, as well as rule, begins by the name
  838. indication in the form of #main_program_name, then statements are
  839. described, separated by symbols ';' . The end of the main program
  840. is marked by the symbol '##'.
  841. Usually operations that deal with the initial object loading,
  842. rule call and unloading of created objects in the files are
  843. concentrated in the main program. Therefore, main program has no
  844. pattern elements or arguments of its own.
  845. When RIGAL is used for parsing, text file with the input
  846. information first is updated by scanner, which transforms it into
  847. a list of tokens. Thus rules describing required parsing can be
  848. applied to this list. Intermediate results of the RIGAL program,
  849. for instance, abstract syntax trees obtained by parsing can be
  850. unloaded in the file by the help of SAVE statement. They are
  851. unloaded so, that other RIGAL programs (for instance, those,
  852. which implement phase of code generation in compilers) can load
  853. them as input data. Sample of main program is given in Section
  854. 12.4.
  855. Rules can be written down after main program in any order.
  856. 10.1 Local Variables
  857. There are no special variable declarations in RIGAL. The fact
  858. that a variable is used in some statement, pattern or expression
  859. implies that the variable is defined as local variable of the
  860. rule or the main program.
  861. For recursive rule calls local rule variables are pushed in a
  862. stack, so, that every active rule instance has its own set of
  863. local variables.
  864. All variables are initialized by the NULL value, when the
  865. corresponding rule or the main program is called and when the
  866. execution of rule branch starts.
  867. 10.2 References to Variables of Other Rules
  868. The construction of RIGAL makes it possible to obtain access
  869. from the rule to local variables of another rule. It has the
  870. following form: LAST #L $X
  871. This reference denotes the value of the variable $X in the
  872. last (in time ) and still active instance of the rule #L. Such
  873. references can be used in left and right sides of assignment
  874. statement.
  875. If at the moment of evaluation of the expression LAST #L $X
  876. there is no active instance of the rule #L, the value of the
  877. expression LAST #L $X equals NULL. If such LAST #L $X is in the
  878. left side of assignment statement, the statement is disregarded
  879. (and run time error message is output).
  880. By the help of LAST we may refer to both rule and main
  881. program variables.
  882. 10.3 Attribute Grammars and RIGAL. Global Attributes
  883. There is a close analogy between attribute grammar and RIGAL
  884. program. Rules in RIGAL correspond to grammar nonterminals, and
  885. variables - to attributes.
  886. In attribute grammars the greatest part of attributes are
  887. used as transit attributes. To avoid this global attributes are
  888. introduced in the attribute grammar implementations. The usage of
  889. LAST references solves this problem in RIGAL.
  890. Implementation of both synthesized and inherited attributes
  891. is possible as it is demonstrated in the following scheme.
  892. #LA ... ... ...
  893. assigns value to the attribute $A
  894. calls #LB ... ... ... ##
  895. #LB ... ... ...
  896. $B1 := LAST #LA $A -- uses the inherited attribute $A from #LA
  897. calls #LC
  898. -- after this call the value of the attribute $C
  899. -- from #LC is assigned to the synthesized attribute $B2
  900. ... ... ... ##
  901. #LC ... ... ...
  902. assigns value to the attribute $C
  903. LAST #LB $B2 := $C -- the value is assigned to the
  904. -- synthesized attribute $B2 of #LB
  905. ... ... ... ##
  906. 11. Built-in Rules
  907. There is a number of built-in rules in the language. These
  908. rules implement functions, the implementation of which is
  909. impossible or ineffective by other language means.
  910. Call of built-in rules is written down the same way as call
  911. of rules defined by the user itself. Along with the value a
  912. built-in rule yields success or failure hence, built-in rules are
  913. used as patterns.
  914. There are predicates such as #ATOM(E), #NUMBER(E), #IDENT(E),
  915. #LIST(E) and #TREE(E).
  916. The built-in rule #LEN(E) returns numerical atom as value. If
  917. E is an atom, then for non-numerical atoms it returns the number
  918. of atom symbols. For numerical atoms this rule returns the number
  919. of significant digits plus 1, if the atom is a negative number.
  920. #LEN( NULL) equals 0, #LEN('ABC') equals 3, #LEN(-185) equals 4.
  921. If E is a list, then #LEN(E) returns the number of list
  922. elements, but, if E is a tree, then it returns the number of tree
  923. branches.
  924. #EXPLODE(E). If E is an atom, then it succeeds and returns
  925. one character atom list that represents the value E 'decomposed'
  926. in separate characters. If E is a numerical atom, only
  927. significant digits are present.
  928. Examples.#EXPLODE(X25) yields (. 'X' '2' '5' .).
  929. #EXPLODE(-34) yields (. '-' '3' '4' .).
  930. #IMPLODE(E1 E2 ... EN). This rule yields the concatenation of
  931. atoms or lists E1, E2, ..., EN in a new, non-numerical atom.
  932. Examples.#IMPLODE( A B 34) equals 'AB34'.
  933. #IMPLODE(25 (. A -3 .) ) equals '25A-3'.
  934. #CHR(N). The rule returns an atom, which consists of just one
  935. ASCII character with the code N ( 0 <= N <= 127).
  936. #ORD(A). Returns an integer, which is an internal code of the
  937. first character of the nonnumerical atom A.
  938. For instance, #ORD( A) = 65, #ORD( ABC) = 65.
  939. #PARM(T) . Returns list of parameters which was assigned when
  940. the whole program called for execution.
  941. #DEBUG(E). If E equals the atom 'RULES', then, as soon as the
  942. rule is called, information concerning calls of the rules (both
  943. user defined and built-in rules) and their execution results will
  944. be output. The call #DEBUG(NORULES) stops the debugging of the
  945. rules.
  946. 12. Sample Compiler
  947. Compiler for the TOYLAN language, which is a very simple
  948. programming language, is discussed in the following units.
  949. Compiler works in two passes. The first phase is parsing and
  950. construction of the program's intermediate form as abstract
  951. syntax tree. The second phase is code generation.
  952. Description of input and intermediate language grammars by
  953. means of RIGAL is presented. Thus formalized compiler
  954. documentation (admitting checking on a computer) is obtained.
  955. 12.1 TOYLAN Language
  956. The description of TOYLAN syntax can be regarded as
  957. description of acceptable sequences of tokens. Atoms represent
  958. lexical elements of TOYLAN program: keywords, identifiers,
  959. constants, operation signs, delimiters. The context free grammar
  960. of TOYLAN can be described in the form of RIGAL program.
  961. #PROGRAM
  962. 'PROGRAM' $Id
  963. (* #DECLARATION ';' *) (+ #STATEMENT + ';' ) ##
  964. #DECLARATION ('INTEGER'!'BOOLEAN') (+ $Id + ',') ##
  965. #STATEMENT ( #ASSIGNMENT ! #INPUT !
  966. #OUTPUT ! #CONDITIONAL ) ##
  967. #ASSIGNMENT $Id ':=' #EXPRESSION ##
  968. #INPUT 'GET' '(' (+ $Id + ',') ')' ##
  969. #OUTPUT 'PUT' '(' (+ #EXPRESSION + ',') ')' ##
  970. #CONDITIONAL 'IF' #EXPRESSION 'THEN' (+ #STATEMENT + ';')
  971. ['ELSE' (+ #STATEMENT + ';' )] 'FI' ##
  972. #EXPRESSION #SUM [ '=' #SUM ] ##
  973. #SUM #FACTOR (* '+' #FACTOR *) ##
  974. #FACTOR #TERM (* '*' #TERM *) ##
  975. #TERM $N ;; -- numeric constant
  976. ('TRUE' ! 'FALSE' ) ;; -- Boolean constants
  977. $Id;; -- variable
  978. '(' #EXPRESSION ')' ##
  979. Context conditions are the following:
  980. 1) all variables used in statements and expressions must be
  981. declared,
  982. 2) one and the same variable name should not be declared
  983. twice,
  984. 3) left and right parts of assignment statement must be of
  985. the same type,
  986. 4) operands of input-output statements must be of the type
  987. INTEGER.
  988. All variables of the type INTEGER have initial value 0, and
  989. all variables of the type BOOLEAN have initial value FALSE.
  990. 12.2 Intermediate Form of Program.
  991. Abstract syntax tree
  992. Special languages to represent intermediate results of
  993. compilation are used in compiler building practice. For instance,
  994. P-code for PASCAL compilers and language DIANA [8] for ADA.
  995. The result of the first phase of the TOYLAN compiler is a
  996. tree. Let's call it abstract syntax tree. Along program
  997. components it contains some semantic attributes, for instance,
  998. types of expressions. One of the most significant attributes is
  999. table of variables, obtained as a result of parsing of the TOYLAN
  1000. program declarations.
  1001. The structure of abstract syntax tree of the TOYLAN program
  1002. is described by the following rules.
  1003. #S_PROGRAM
  1004. 'PROGRAM'::<. NAME : $Id,
  1005. DECLARATIONS : #S_DECLARATIONS ,
  1006. STATEMENTS : (.(* #S_STATEMENT *).) .> ##
  1007. #S_DECLARATIONS -- variables table
  1008. <* $Id : ( INTEGER ! BOOLEAN ) *> ##
  1009. #S_STATEMENT
  1010. ASSIGNMENT :: <. LEFT : $Id,
  1011. RIGHT : #S_EXPRESSION .> ;;
  1012. INPUT :: (. (* $Id *) .) ;;
  1013. OUTPUT :: (. (* #S_EXPRESSION *) .) ;;
  1014. CONDITIONAL :: <. COND : #S_EXPRESSION,
  1015. THEN : (.(* #S_STATEMENT *).),
  1016. [ ELSE : (.(* #S_STATEMENT *).)] .> ##
  1017. #S_EXPRESSION
  1018. COMPARE :: <. ARG1 : #S_EXPRESSION, ARG2 : #S_EXPRESSION,
  1019. TYPE : BOOLEAN .> ;;
  1020. ADD :: <. ARG1 : #S_EXPRESSION, ARG2 : #S_EXPRESSION,
  1021. TYPE : INTEGER .> ;;
  1022. MULT :: <. ARG1 : #S_EXPRESSION, ARG2 : #S_EXPRESSION,
  1023. TYPE : INTEGER .> ;;
  1024. <. VARIABLE : $Id , TYPE : ( INTEGER ! BOOLEAN ) .> ;;
  1025. <. CONSTANT : $N , TYPE : INTEGER .> ;;
  1026. <. CONSTANT : ( 0 ! 1) , TYPE : BOOLEAN .> ##
  1027. 12.3 Target Language BAL
  1028. The goal of the TOYLAN compiler is to obtain program text in
  1029. a low level language BAL. This language is a simplified model of
  1030. assembler languages. The memory of BAL-machine is divided into
  1031. separate words. Every word may contain an integer, besides, there
  1032. are work registers R0, R1, R2, ... of the word size each. Let us
  1033. suppose the number of registers to be unlimited, in order to
  1034. eliminate the problem of optimal register usage during generation
  1035. phase.
  1036. Command of BAL ABC: DEFWORD N reserves a word in the memory
  1037. and imbeds integer N in it. We can refer to this word in other
  1038. commands by name ABC.
  1039. Commands of BAL have two operands which are described by the
  1040. name of memory word, by the name of register or by the literal of
  1041. the type =NNN, where NNN is an integer. Commands can be marked by
  1042. labels.
  1043. 1) MOV A1,A2 This command moves memory word A1 to memory word A2.
  1044. 2) LOAD RI,A Loading of word A into register RI.
  1045. 3) SAVE RI,A Unloading of the contents of register RI into memory
  1046. word A.
  1047. 4) ADD RI,A or ADD RI,RJ The sum of operands is imbedded in RI.
  1048. 5) MULT RI,A or MULT RI,RJ Multiplication of operands is
  1049. imbedded in RI.
  1050. 6) COMPARE RI,A or COMPARE RI,RJ If operand values are equal,
  1051. it is 1 that is imbedded in RI, if they are not
  1052. equal, 0 is imbedded.
  1053. 7) BRANCH RI,M If the value of RI is equal to 0, then transfer
  1054. to the command marked by label M takes place,
  1055. otherwise, to the next command.
  1056. 8) JUMP M Unconditional transfer to the command marked by label M.
  1057. 9) EOJ Completes the execution of the BAL program.
  1058. 10) READ A Reads the integer from standard input device and
  1059. imbeds it in word A.
  1060. 11) WRITE A or WRITE RI Outputs the integer from memory word or
  1061. from register to standard output device.
  1062. 12) NOP An empty statement.
  1063. 12.4 Main Module of Compiler
  1064. The main program of the TOYLAN compiler contains calls of the
  1065. first and second compilation phases and file opening statements.
  1066. #TOYLAN_COMPILER
  1067. OPEN REP ' '; --message file is connected with the screen
  1068. $LEXEMS:=#CALL_PAS(35 'A.TOY');
  1069. -- a list of tokens is loaded from the file A.TOY by scanner
  1070. $S_TREE := #A_PROGRAM($LEXEMS);
  1071. -- 1st phase; result of parsing - abstract syntax tree - is
  1072. -- imbedded in the variable $S_TREE; during parsing messages
  1073. -- about discovered errors in file REP can be output.
  1074. IF $S_TREE -> OPEN GEN 'A.BAL'; -- if the tree is created,
  1075. -- then file is opened to output the generated BAL text
  1076. #G_PROGRAM($S_TREE) -- 2nd phase - code generation
  1077. ELSIF T -> REP << errors are discovered FI;
  1078. REP << end of compilation ##
  1079. The compilation listing that contains source text and error
  1080. messages is not envisaged in the TOYLAN compiler. Formation of
  1081. the listing can be a separate phase.
  1082. 12.5 Parsing Phase
  1083. The rule #A_PROGRAM carries out parsing of tokens list,
  1084. checks context conditions, generates error messages and builds
  1085. abstract syntax tree of TOYLAN program.
  1086. Patterns of the rule #A_PROGRAM and of the rules subordinate
  1087. to it, actually, coincide with patterns of the rule #PROGRAM and
  1088. with the associated rules that describe context free grammar of
  1089. the TOYLAN language. Just operations to check context conditions,
  1090. to output error messages and to construct abstract syntax tree
  1091. are added.
  1092. In our parser diagnostics is based on the following
  1093. principles. First of all, "panic" reaction to an error should be
  1094. avoided and several messages concerning one and the same error
  1095. should not be output (though, we can't manage it always),
  1096. secondly, error neutralization is transition to the analysis of
  1097. the next statement, i.e., skip of tokens until the nearest symbol
  1098. ';'.
  1099. #A_PROGRAM -- the rule is applied to the list of tokens
  1100. (. PROGRAM $Id
  1101. (* $DECL++:= #A_DECLARATION ';' *)
  1102. --formation of variables table
  1103. (+ $STATEMENTS !.:= #A_STATEMENT + ';' )
  1104. --formation of statements list
  1105. .) / RETURN 'PROGRAM' :: <. NAME : $Id,
  1106. DECLARATIONS : $DECL ,
  1107. STATEMENTS : $STATEMENTS .>/ ##
  1108. #A_DECLARATION $TYPE := ( INTEGER ! BOOLEAN )
  1109. (+ $Id /IF LAST #A_PROGRAM $DECL.$Id OR $REZ.$Id ->
  1110. REP << VARIABLE $Id DOUBLE DEFINED FI;
  1111. $REZ++:= <.$Id : $TYPE .>/ + ',' ) / RETURN $REZ / ##
  1112. #A_STATEMENT $REZ := ( #A_ASSIGNMENT ! #A_INPUT !
  1113. #A_OUTPUT ! #A_CONDITIONAL ) / RETURN $REZ / ;;
  1114. (* $A!.:=S'($$ <> ';' ) *) -- skip until nearest ';'
  1115. / REP << UNRECOGNIZED STATEMENT $A /
  1116. ##
  1117. #A_ASSIGNMENT $Id ':='/ $LPType := LAST #A_PROGRAM $DECL .$Id;
  1118. IF NOT $LPType -> REP << VARIABLE $Id ' IS NOT DEFINED ' FI /
  1119. $E:= #A_EXPRESSION
  1120. /IF $LPType <> $E . TYPE ->
  1121. REP<< 'LEFT AND RIGHT SIDE TYPES ARE DIFFERENT '
  1122. 'IN ASSIGNMENT STATEMENT ' FI;
  1123. RETURN ASSIGNMENT::<. LEFT: $Id, RIGHT: $E .> /
  1124. ONFAIL IF $LPType -> REP<< 'WRONG EXPRESSION IN ASSIGNMENT' FI ##
  1125. #A_INPUT GET '('
  1126. (+ $E !.:= $Id /IF LAST #A_PROGRAM $DECL.$Id <> INTEGER ->
  1127. REP << $Id 'IN STATEMENT GET IS NOT OF THE TYPE INTEGER'
  1128. FI / + ',' ) ')' / RETURN INPUT :: $E / ##
  1129. #A_OUTPUT PUT '(' (+ $C := #A_EXPRESSION / $E !.:= $C;
  1130. IF $C . TYPE <> INTEGER ->
  1131. REP << OPERAND OF PUT STATEMENT IS NOT OF THE TYPE INTEGER
  1132. FI / + ',' ) ')'/ RETURN OUTPUT :: $E / ##
  1133. #A_CONDITIONAL 'IF' $BE := #A_EXPRESSION
  1134. /IF $BE . TYPE <> BOOLEAN ->
  1135. REP<< CONDITION IS NOT OF BOOLEAN TYPE FI /
  1136. 'THEN' (+ $P1 !.:= #A_STATEMENT + ';' )
  1137. [ 'ELSE' (+ $P2 !.:= #A_STATEMENT + ';' ) ] 'FI'
  1138. / RETURN CONDITIONAL :: <. COND : $BE , THEN : $P1 ,
  1139. ELSE : $P2 .> / ##
  1140. #A_EXPRESSION $A := #A_SUM [ '=' $B := #A_SUM
  1141. / $A := COMPARE::<. ARG1 : $A, ARG2 : $B, TYPE : BOOLEAN.>/ ]
  1142. / RETURN $A / ##
  1143. #A_SUM $A := #A_FACTOR (* '+' $B := #A_FACTOR
  1144. / $A := ADD::<. ARG1: $A, ARG2: $B, TYPE: INTEGER .>/ *)
  1145. / RETURN $A / ##
  1146. #A_FACTOR $A := #A_TERM (* '*' $B := #A_TERM
  1147. /$A := MULT::<. ARG1: $A, ARG2: $B, TYPE: INTEGER .>/ *)
  1148. / RETURN $A / ##
  1149. #A_TERM
  1150. $N / RETURN <. CONSTANT : $N , TYPE : INTEGER .>/;;
  1151. ( ( TRUE / $K :=1/ ) ! ( FALSE / $K :=0 / ) )
  1152. /RETURN <. CONSTANT: $K, TYPE: BOOLEAN .>/ ;;
  1153. $Id / $X:= LAST #A_PROGRAM $DECL.$Id;
  1154. IF NOT $X -> REP << VARIABLE $Id IS NOT DECLARED
  1155. ELSIF T -> RETURN <. VARIABLE: $Id, TYPE: $X .> FI / ;;
  1156. '(' $E := #A_EXPRESSION ')' / RETURN $E / ##
  1157. 12.6 Code Generation Phase
  1158. Code generation is performed when traversing abstract syntax
  1159. tree.
  1160. To avoid possible conflicts between variable names in the
  1161. TOYLAN program and register names (of the type RNNN) and labels
  1162. (of the type LNNN) in the object program, variable names are
  1163. substituted by standard names of the type VARNNN.
  1164. #G_PROGRAM / $LABEL := 0 / --global variable $LABEL serves
  1165. --to generate unique labels.
  1166. PROGRAM::<.DECLARATIONS: $TAB := #TABLE_OF_NUMBERS,
  1167. --creation of the table of unique variable numbers
  1168. STATEMENTS: (.(* #G_STATEMENT *).) / GEN << 'EOJ' /,
  1169. DECLARATIONS : #G_DECLARATIONS .> ##
  1170. #TABLE_OF_NUMBERS <* $Id: $TYPE /$N :=$N+1; $T++:=<. $Id: $N.>/ *>
  1171. /RETURN $T/ ##
  1172. #G_STATEMENT ( #G_ASSIGNMENT ! #G_INPUT !
  1173. #G_OUTPUT ! #G_CONDITIONAL ) ##
  1174. #G_ASSIGNMENT ASSIGNMENT::<. LEFT: $Id := #NAME,
  1175. RIGHT :( ( <. VARIABLE: $Id1:=#NAME .>
  1176. /GEN << MOV @ $Id1 ',' $Id / ) !
  1177. ( <. CONSTANT : $N .> /GEN << MOV @ '=' $N ',' $Id /) !
  1178. ( $NREG := #G_EXPRESSION
  1179. /GEN << 'SAVE' @ 'R' $NREG ',' $Id / ) ) .> ##
  1180. #G_INPUT INPUT::(. (* $Id := #NAME /GEN << READ $Id / *) .) ##
  1181. #G_OUTPUT OUTPUT :: (. (*
  1182. ( ( <. VARIABLE : $Id := #NAME .> /GEN << WRITE $Id / ) !
  1183. ( <. CONSTANT : $N .> /GEN << WRITE @ '=' $N / ) !
  1184. ( $NREG := #G_EXPRESSION /GEN << WRITE @ 'R' $NREG /) )
  1185. *) .) ##
  1186. #G_CONDITIONAL CONDITIONAL ::
  1187. <. COND : $NREG := #G_EXPRESSION
  1188. / $LABEL1 :=#NEW_LABEL(); $LABEL2 :=#NEW_LABEL() /,
  1189. THEN : / GEN << BRANCH @ 'R' $NREG ',L' $LABEL1 /
  1190. (. (* #G_STATEMENT *) .)
  1191. / IF $.ELSE -> GEN << JUMP @ 'L' $LABEL2 FI;
  1192. GEN << @ 'L' $LABEL1 ': NOP' / ,
  1193. [ ELSE : (. (* #G_STATEMENT *) .)
  1194. / GEN << @ 'L' $LABEL2 ': NOP' / ] .> ##
  1195. #G_EXPRESSION --returns the number of the register containing
  1196. --result of the evaluation of expression
  1197. $EXPR
  1198. / $NREG := 0 / -- number of the first accessible register
  1199. /RETURN #G_EXPR($EXPR)/
  1200. ##
  1201. #G_EXPR ( <. VARIABLE: $ID :=#NAME .> !
  1202. <. CONSTANT: $N / $ID := #IMPLODE('=' $N)/ .>)
  1203. / $REG := COPY( LAST #G_EXPRESSION $NREG ) ;
  1204. GEN << 'LOAD' @ 'R' $REG ',' $ID ;
  1205. LAST #G_EXPRESSION $NREG + := 1; RETURN $REG / ;;
  1206. $OP::<. ARG1 : $R1 := #G_EXPR, ARG2 : $R2 := #G_EXPR .>
  1207. / GEN << $OP @ 'R' $R1 ',R' $R2 ; RETURN $R1 / ##
  1208. #G_DECLARATIONS
  1209. <* $ID: $TYPE /$ID1 := #NAME($ID); GEN<< $ID1 ':' DEFWORD 0 /*> ##
  1210. #NEW_LABEL --auxiliary rule
  1211. /LAST #G_PROGRAM $LABEL+:=1;
  1212. RETURN COPY (LAST #G_PROGRAM $LABEL )/ ##
  1213. #NAME $ID --returns standard name of the variable $ID in $TAB
  1214. / RETURN #IMPLODE( VAR LAST #G_PROGRAM $TAB.$ID)/ ##
  1215. 13. Conclusions and Future Work
  1216. As it was demonstrated above, RIGAL supports syntax-oriented
  1217. style of compiler design. Programs written in RIGAL are
  1218. well-structured and it is easy to read and debug them.
  1219. Our experience [13] proves that the optimizing RIGAL compiler
  1220. in VAX/VMS environment makes it possible to implement production
  1221. quality compilers for high level languages.
  1222. RIGAL can be considered as yet another language prototyping
  1223. tool in the sense of [14], because it allows the designer to
  1224. develop an experimental translator in short period of time.
  1225. Besides interpreter for debugging purposes and optimizing
  1226. compiler RIGAL support system includes a cross-referencer, which
  1227. helps to avoid misuse of global variables.
  1228. In order to improve static and dynamic type checking,
  1229. variable type descriptions in the form of formal comments would
  1230. be added to the language.
  1231. Taking in account that control structures of RIGAL program
  1232. are very close to input data structures, it seems promising to
  1233. develop automatic and semiautomatic methods for test example
  1234. generation for the given RIGAL program.
  1235. References
  1236. [1] A.Aho, J.Ullman. The theory of parsing, translation and
  1237. compiling// Prentice-Hall, Inc. Englewood Cliffs,N.J. 1972. -
  1238. vol.1,2.
  1239. [2] S.C.Johnson. YACC - Yet Another Compiler Compiler // Bell
  1240. Laboratories, Murray Hill,N.J., 1978, A technical manual.
  1241. [3] C.H.Koster. Using the CDL Compiler Compiler// Lecture Notes
  1242. in Computer Science , Vol.21, Springer-Verlag, Berlin, 1977.
  1243. [4] I.R.Agamirzyan. Compiler Design Technological Support System SHAG. // Space
  1244. mechanics algorithms, Leningrad, vol. 79,1985, pp. 1-53.,(in Russian)
  1245. [5] D.E.Knuth. Semantics of context-free languages// Mathematical
  1246. Systems Theory, 2, 2, 1968, pp.127-146.
  1247. [6] V.A.Serebryakov. Methods of Attribute Translation.// In:
  1248. Programming Languages, Moscow, "Nauka", 1985, pp. 47-79,(in Russian).
  1249. [7] A.O.Vooglaid, M.V. Lepp, D.B.Lijb. Input Languages of the
  1250. ELMA System. // Proceedings of the Tallin Polytechnical Institute,
  1251. (in Russian).
  1252. [8] The intermediate language DIANA : Design and Implementation //Lecture
  1253. Notes in Computer Science, Vol.180, Springer-Verlag,
  1254. Berlin, 1984.
  1255. [9] R.Vilhelm. Presentation of the compiler generation system MUG2:
  1256. Examples, global flow analysis and optimization// Le point
  1257. sur la compilation, INRIA, 1978, à.307-336.
  1258. [10] Basic REFAL and its implementation on computers.// CNIPIASS,
  1259. Moscow, 1977, (in Russian).
  1260. [11] P.Lucas. Formal definition of programming languages and
  1261. systems // IFIP Congress, 1971.
  1262. [12] M.Ganapatti, C.N.Fisher, J.L.Hennessy. Retargetable compiler
  1263. code generation// ACM Computing Survays, 14(4), 1982.
  1264. [13] J.Barzdin, A.Kalnins, M.Auguston, SDL tools for rapid
  1265. prototyping and testing.// in SDL'89 : The language at work,
  1266. ed. O.Faergemand and M.M.Marques, North-Holland, 1989,
  1267. pp.127-133.
  1268. [14] R.Herndon, V.Berzins, The realizable benefits of a language
  1269. prototyping language.// IEEE Transactions on Software
  1270. Engineering , vol.14, No 6, June 1988, pp.803-809