From 7d010a9891c42f6e60eb18cbf5b75be9878a8336 Mon Sep 17 00:00:00 2001 From: H.G. Muller Date: Sat, 6 Nov 2010 14:43:54 +0100 Subject: [PATCH] Allow arbitrary nesting of sub-variations in PGN input The parsing of subvariations is moved out of the state-machine parsing, to allow arbitrary nesting. In stead of the entire sub-variation being parsed by lex as Comment, it now returns Open and Close for the corresponding parentheses, which are then handled in a wrapper routine for yylex(): Myylex(). Problem was that yy_text is supposed to return the entire parsed text of the returned symbol, which thus has to be collected. To make this possible, yylex has to return everything that it parses, (even white space!), and a symbol "Nothing" was added to return in parsing rules that before just ate away irrelevant text without returning anything. --- backend.c | 18 +++++++++--------- common.h | 2 +- gamelist.c | 4 ++-- parser.h | 2 +- parser.l | 39 ++++++++++++++++++++++++++++++--------- pgntags.c | 2 +- 6 files changed, 44 insertions(+), 23 deletions(-) diff --git a/backend.c b/backend.c index b041f98..01d447e 100644 --- a/backend.c +++ b/backend.c @@ -8222,7 +8222,7 @@ ParseGameHistory(game) yynewstr(game); for (;;) { yyboardindex = boardIndex; - moveType = (ChessMove) yylex(); + moveType = (ChessMove) Myylex(); switch (moveType) { case IllegalMove: /* maybe suicide chess, etc. */ if (appData.debugMode) { @@ -9671,7 +9671,7 @@ LoadGameOneMove(readAhead) } else { if (gameFileFP == NULL) return FALSE; - moveType = (ChessMove) yylex(); + moveType = (ChessMove) Myylex(); } done = FALSE; @@ -10166,7 +10166,7 @@ LoadGame(f, gameNumber, title, useList) cm = lastLoadGameStart = EndOfFile; while (gn > 0) { yyboardindex = forwardMostMove; - cm = (ChessMove) yylex(); + cm = (ChessMove) Myylex(); switch (cm) { case EndOfFile: if (cmailMsgLoaded) { @@ -10219,7 +10219,7 @@ LoadGame(f, gameNumber, title, useList) if (gn > 0) { do { yyboardindex = forwardMostMove; - cm = (ChessMove) yylex(); + cm = (ChessMove) Myylex(); } while (cm == PGNTag || cm == Comment); } break; @@ -10258,7 +10258,7 @@ LoadGame(f, gameNumber, title, useList) /* Skip any header junk before position diagram and/or move 1 */ for (;;) { yyboardindex = forwardMostMove; - cm = (ChessMove) yylex(); + cm = (ChessMove) Myylex(); if (cm == EndOfFile || cm == GNUChessGame || cm == XBoardGame) { @@ -10331,7 +10331,7 @@ LoadGame(f, gameNumber, title, useList) } yyboardindex = forwardMostMove; - cm = (ChessMove) yylex(); + cm = (ChessMove) Myylex(); /* Handle comments interspersed among the tags */ while (cm == Comment) { @@ -10341,7 +10341,7 @@ LoadGame(f, gameNumber, title, useList) p = yy_text; AppendComment(currentMove, p, FALSE); yyboardindex = forwardMostMove; - cm = (ChessMove) yylex(); + cm = (ChessMove) Myylex(); } } @@ -10418,7 +10418,7 @@ LoadGame(f, gameNumber, title, useList) } } yyboardindex = forwardMostMove; - cm = (ChessMove) yylex(); + cm = (ChessMove) Myylex(); } if (first.pr == NoProc) { @@ -10444,7 +10444,7 @@ LoadGame(f, gameNumber, title, useList) p = yy_text; AppendComment(currentMove, p, FALSE); yyboardindex = forwardMostMove; - cm = (ChessMove) yylex(); + cm = (ChessMove) Myylex(); } if ((cm == EndOfFile && lastLoadGameStart != EndOfFile ) || diff --git a/common.h b/common.h index aa46600..63cee7d 100644 --- a/common.h +++ b/common.h @@ -263,7 +263,7 @@ typedef enum { WhiteDrop, BlackDrop, NormalMove, AmbiguousMove, IllegalMove, ImpossibleMove, WhiteWins, BlackWins, GameIsDrawn, GameUnfinished, - GNUChessGame, XBoardGame, MoveNumberOne, + GNUChessGame, XBoardGame, MoveNumberOne, Open, Close, Nothing, Comment, PositionDiagram, ElapsedTime, PGNTag, NAG } ChessMove; diff --git a/gamelist.c b/gamelist.c index 028b961..5b480d7 100644 --- a/gamelist.c +++ b/gamelist.c @@ -225,7 +225,7 @@ int GameListBuild(f) do { yyboardindex = 0; offset = yyoffset(); - cm = (ChessMove) yylex(); + cm = (ChessMove) Myylex(); switch (cm) { case GNUChessGame: if ((error = GameListNewGame(¤tListGame))) { @@ -280,7 +280,7 @@ int GameListBuild(f) do { yyboardindex = 1; offset = yyoffset(); - cm = (ChessMove) yylex(); + cm = (ChessMove) Myylex(); if (cm == PGNTag) { ParsePGNTag(yy_text, ¤tListGame->gameInfo); } diff --git a/parser.h b/parser.h index 57296e1..fe105b2 100644 --- a/parser.h +++ b/parser.h @@ -53,7 +53,7 @@ extern void yynewfile P((FILE *f)); extern void yynewstr P((char *s)); -extern int yylex P((void)); +extern int Myylex P((void)); // [HGM] yylex now globally invisible, all calls must use wrapper extern ChessMove yylexstr P((int boardIndex, char *s, char *buf, int buflen)); extern char currentMoveString[]; extern int yyboardindex; diff --git a/parser.l b/parser.l index 6dcd13b..4ac67dc 100644 --- a/parser.l +++ b/parser.l @@ -911,10 +911,11 @@ extern void CopyBoard P((Board to, Board from)); return (int) GameUnfinished; } -[1-9][0-9]*/"."?[ \t\n]*[a-lNnPpRrBQqKACFEWDGHOo] { +[1-9][0-9]*/"."?[ \t\n]*[a-lnprqoA-Z+] { /* move numbers */ if ((yyleng == 1) && (yytext[0] == '1')) return (int) MoveNumberOne; + else return (int) Nothing; // [HGM] make sure something is returned, for gathering parsed text } \([0-9]+:[0-9][0-9](\.[0-9]+)?\)|\{[0-9]+:[0-9][0-9](\.[0-9]+)?\} { @@ -960,24 +961,24 @@ extern void CopyBoard P((Board to, Board from)); return (int) Comment; } -\([^()]*(\([^()]*(\([^()]*(\([^()]*\)[^()]*)*\)[^()]*)*\)[^()]*)+[^()]*\) { /* very nested () */ - return (int) Comment; +\( { /* Opening parentheses */ + return (int) Open; } -\([^)][^)]+\) { /* >=2 chars in () */ - return (int) Comment; +\) { /* closing parentheses */ + return (int) Close; } ^[-a-zA-Z0-9]+:" ".*(\n[ \t]+.*)* { - /* Skip mail headers */ + return (int) Nothing; /* Skip mail headers */ } [a-zA-Z0-9'-]+ { - /* Skip random words */ + return (int) Nothing; /* Skip random words */ } .|\n { - /* Skip everything else */ + return (int) Nothing; /* Skip everything else */ } %% @@ -1158,7 +1159,7 @@ ChessMove yylexstr(boardIndex, s, text, len) yy_switch_to_buffer(buffer); #endif /*FLEX_SCANNER*/ - ret = (ChessMove) yylex(); + ret = (ChessMove) Myylex(); strncpy(text, yy_text, len-1); // [HGM] vari: yy_text is not available to caller after buffer switch ?!? text[len-1] = NULLCHAR; @@ -1171,3 +1172,23 @@ ChessMove yylexstr(boardIndex, s, text, len) return ret; } + +int Myylex() +{ // [HGM] wrapper for yylex, which treats nesting of parentheses + int symbol, nestingLevel = 0, i=0; + char *p; + static char buf[256*MSG_SIZ]; + buf[0] = NULLCHAR; + do { // eat away anything not at level 0 + symbol = yylex(); + if(symbol == Open) nestingLevel++; + if(nestingLevel) { // save all parsed text between (and including) the () + for(p=yytext; *p && i<256*MSG_SIZ-2;) buf[i++] = *p++; + buf[i] = NULLCHAR; + } + if(symbol == 0) break; // ran into EOF + if(symbol == Close) symbol = Comment, nestingLevel--; + } while(nestingLevel || symbol == Nothing); + yy_text = buf[0] ? buf : (char*)yytext; + return symbol; +} diff --git a/pgntags.c b/pgntags.c index 82dfc1d..202b0d9 100644 --- a/pgntags.c +++ b/pgntags.c @@ -250,7 +250,7 @@ ReplaceTags(tags, gameInfo) yynewstr(tags); for (;;) { yyboardindex = 0; - moveType = (ChessMove) yylex(); + moveType = (ChessMove) Myylex(); if (moveType == (ChessMove) 0) { break; } else if (moveType == PGNTag) { -- 1.7.0.4