X-Git-Url: http://winboard.nl/cgi-bin?a=blobdiff_plain;f=parser.c;h=481e7b241c47304398c21697f5f86b2ed8ae512d;hb=e6e38912837a4fe2464356408d10dee950b3121c;hp=09bb390b54548591b8626fef39ac15f85b54f538;hpb=12fa48d7b282c15a16eefc5bdc246daa25ae4dfe;p=xboard.git diff --git a/parser.c b/parser.c index 09bb390..481e7b2 100644 --- a/parser.c +++ b/parser.c @@ -1,13 +1,24 @@ -// New PGN parser by by HGM. I was dissatisfied with the old flex-generated parser for several reasons: -// 1) It required flex to build -// 2) It was not possible to use variant-dependent syntax, which gave trouble for '+' as Sogi promoChar vs check symbol -// 3) It could not handle double-digit rank numbers -// 4) It could not handle PSN moves, with (alpha rank and file digit) -// 5) Having more than 12 ranks would require extension of the rules anyway -// 6) It was cumbersome to maintain, which much code duplication that had to be kept in sync when changing something -// 7) It needed special handling for packaging, because we wanted to include parser.c for people who had no flex -// 8) It was quite large because of the table-driven flex algorithm. -// This new parser suffers from none of that. It might even accomodate traditional Xiangqi notation at some future time. +/* + * parser.c -- + * + * Copyright 2011, 2012, 2013, 2014 Free Software Foundation, Inc. + * ------------------------------------------------------------------------ + * + * GNU XBoard is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or (at + * your option) any later version. + * + * GNU XBoard is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see http://www.gnu.org/licenses/. * + * + *------------------------------------------------------------------------ + ** See the file ChangeLog for a revision history. */ #include "config.h" #include @@ -40,7 +51,8 @@ static char fromString = 0, lastChar = '\n'; #define ALPHABETIC 2 #define BADNUMBER (-2000000000) -int ReadLine() +int +ReadLine () { // Read one line from the input file, and append to the buffer char c, *start = inPtr; if(fromString) return 0; // parsing string, so the end is a hard end @@ -55,7 +67,8 @@ int ReadLine() return 1; } -int Scan(char c, char **p) +int +Scan (char c, char **p) { // line-spanning skip to mentioned character or EOF do { while(**p) if(*(*p)++ == c) return 0; @@ -64,7 +77,8 @@ int Scan(char c, char **p) return 1; } -int SkipWhite(char **p) +int +SkipWhite (char **p) { // skip spaces tabs and newlines; return 1 if anything was skipped char *start = *p; do{ @@ -73,7 +87,8 @@ int SkipWhite(char **p) return *p != start; } -inline int Match(char *pattern, char **ptr) +inline int +Match (char *pattern, char **ptr) { char *p = pattern, *s = *ptr; while(*p && (*p == *s++ || s[-1] == '\r' && *p--)) p++; @@ -84,7 +99,8 @@ inline int Match(char *pattern, char **ptr) return 0; // no match, no ptr update } -inline int Word(char *pattern, char **p) +inline int +Word (char *pattern, char **p) { if(Match(pattern, p)) return 1; if(*pattern >= 'a' && *pattern <= 'z' && *pattern - **p == 'a' - 'A') { // capitalized @@ -95,15 +111,16 @@ inline int Word(char *pattern, char **p) return 0; } -int Verb(char *pattern, char **p) +int +Verb (char *pattern, char **p) { int res = Word(pattern, p); if(res && !Match("s", p)) Match("ed", p); // eat conjugation suffix, if any return res; } - -int Number(char **p) +int +Number (char **p) { int val = 0; if(**p < '0' || **p > '9') return BADNUMBER; @@ -113,7 +130,8 @@ int Number(char **p) return val; } -int RdTime(char c, char **p) +int +RdTime (char c, char **p) { char *start = ++(*p), *sec; // increment *p, as it was pointing to the opening ( or { if(Number(p) == BADNUMBER) return 0; @@ -128,23 +146,25 @@ int RdTime(char c, char **p) return 0; } -char PromoSuffix(char **p) +char +PromoSuffix (char **p) { char *start = *p; if(**p == 'e' && (Match("ep", p) || Match("e.p.", p))) { *p = start; return NULLCHAR; } // non-compliant e.p. suffix is no promoChar! - if(**p == '+' && gameInfo.variant == VariantShogi) { (*p)++; return '+'; } - if(**p == '=') (*p)++; //optional = - if(**p == '(' && (*p)[2] == ')' && isalpha( (*p)[1] )) { (*p) += 3; return (*p)[-2]; } - if(isalpha(**p)) return *(*p)++; - if(*p != start) return '='; // must be the optional = + if(**p == '+' && IS_SHOGI(gameInfo.variant)) { (*p)++; return '+'; } + if(**p == '=' || (gameInfo.variant == VariantSChess) && **p == '/') (*p)++; // optional = (or / for Seirawan gating) + if(**p == '(' && (*p)[2] == ')' && isalpha( (*p)[1] )) { (*p) += 3; return ToLower((*p)[-2]); } + if(isalpha(**p) && **p != 'x') return ToLower(*(*p)++); // reserve 'x' for multi-leg captures? + if(*p != start) return **p == '+' ? *(*p)++ : '='; // must be the optional = (or =+) return NULLCHAR; // no suffix detected } -int NextUnit(char **p) +int +NextUnit (char **p) { // Main parser routine int coord[4], n, result, piece, i; char type[4], promoted, separator, slash, *oldp, *commentEnd, c; - int wom = WhiteOnMove(yyboardindex); + int wom = quickFlag ? quickFlag&1 : WhiteOnMove(yyboardindex); // ********* try white first, because it is so common ************************** if(**p == ' ' || **p == '\n' || **p == '\t') { parseStart = (*p)++; return Nothing; } @@ -154,6 +174,10 @@ int NextUnit(char **p) if(fromString) return 0; // we are parsing string, so the end is really the end *p = inPtr = inputBuf; if(!ReadLine()) return 0; // EOF + } else if(inPtr > inputBuf + PARSEBUFSIZE/2) { // buffer fills up with already parsed stuff + char *q = *p, *r = inputBuf; + while(*r++ = *q++); + *p = inputBuf; inPtr = r - 1; } parseStart = oldp = *p; // remember where we begin @@ -200,10 +224,9 @@ int NextUnit(char **p) type[1] = NOTHING; // disambiguator goes in first two positions n = 4; } -if(appData.debugMode)fprintf(debugFP, "trial %d,%d,%d,%d type %d%d%d%d\n", coord[0], coord[1], coord[2], coord[3], type[0], type[1], type[2], type[3]); // we always get here; move must be completely read now, with to-square coord(s) at end if(n == 3) { // incomplete to-square. Could be Xiangqi traditional, or stuff like fxg - if(piece && type[1] == NOTHING && type[0] == NUMERIC && type[2] == NUMERIC && + if(piece && type[1] == NOTHING && type[0] == NUMERIC && type[2] == NUMERIC && (separator == '+' || separator == '=' || separator == '-')) { // Xiangqi traditional @@ -217,7 +240,7 @@ if(appData.debugMode)fprintf(debugFP, "trial %d,%d,%d,%d type %d%d%d%d\n", coor } else if(n == 1 && type[0] == NUMERIC && coord[0] > 1) { while(**p == '.') (*p)++; return Nothing; } // fast exit for move numbers if(n == 4 && type[2] != type[3] && // we have a valid to-square (kludge: type[3] can be NOTHING on fxg type move) (piece || !promoted) && // promoted indicator only valid on named piece type - (type[2] == ALPHABETIC || gameInfo.variant == VariantShogi)) { // in Shogi also allow alphabetic rank + (type[2] == ALPHABETIC || IS_SHOGI(gameInfo.variant))) { // in Shogi also allow alphabetic rank DisambiguateClosure cl; int fromX, fromY, toX, toY; @@ -238,7 +261,7 @@ if(appData.debugMode)fprintf(debugFP, "trial %d,%d,%d,%d type %d%d%d%d\n", coor if(piece) { cl.pieceIn = CharToPiece(wom ? piece : ToLower(piece)); if(cl.pieceIn == EmptySquare) return ImpossibleMove; // non-existent piece - if(promoted) cl.pieceIn = (ChessSquare) (PROMOTED cl.pieceIn); + if(promoted) cl.pieceIn = (ChessSquare) (CHUPROMOTED cl.pieceIn); } else cl.pieceIn = EmptySquare; if(separator == '@' || separator == '*') { // drop move. We only get here without from-square or promoted piece fromY = DROP_RANK; fromX = cl.pieceIn; @@ -256,13 +279,25 @@ if(appData.debugMode)fprintf(debugFP, "trial %d,%d,%d,%d type %d%d%d%d\n", coor fromY = (currentMoveString[1] = coord[1] + '0') - ONE; currentMoveString[4] = cl.promoCharIn = PromoSuffix(p); currentMoveString[5] = NULLCHAR; + if(!cl.promoCharIn && (**p == '-' || **p == 'x')) { // Lion-type multi-leg move + currentMoveString[5] = (killX = toX) + AAA; // what we thought was to-square is in fact kill-square + currentMoveString[6] = (killY = toY) + ONE; // append it as suffix behind long algebraic move + currentMoveString[4] = ';'; + currentMoveString[7] = NULLCHAR; + // read new to-square (VERY non-robust! Assumes correct (non-alpha-rank) syntax, and messes up on errors) + toX = cl.ftIn = (currentMoveString[2] = *++*p) - AAA; ++*p; + toY = cl.rtIn = (currentMoveString[3] = Number(p) + '0') - ONE; + } if(type[0] != NOTHING && type[1] != NOTHING && type[3] != NOTHING) { // fully specified. + ChessSquare realPiece = boards[yyboardindex][fromY][fromX]; // Note that Disambiguate does not work for illegal moves, but flags them as impossible if(piece) { // check if correct piece indicated - ChessSquare realPiece = boards[yyboardindex][fromY][fromX]; if(PieceToChar(realPiece) == '~') realPiece = (ChessSquare) (DEMOTED realPiece); if(!(appData.icsActive && PieceToChar(realPiece) == '+') && // trust ICS if it moves promoted pieces piece && realPiece != cl.pieceIn) return ImpossibleMove; + } else if(!separator && **p == '+') { // could be a protocol move, where bare '+' suffix means shogi-style promotion + if(realPiece < (wom ? WhiteCannon : BlackCannon) && PieceToChar(PROMOTED realPiece) == '+') // seems to be that + currentMoveString[4] = cl.promoCharIn = *(*p)++; // append promochar after all } result = LegalityTest(boards[yyboardindex], PosFlags(yyboardindex), fromY, fromX, toY, toX, cl.promoCharIn); if (currentMoveString[4] == NULLCHAR) { // suppy missing mandatory promotion character @@ -298,6 +333,7 @@ if(appData.debugMode)fprintf(debugFP, "trial %d,%d,%d,%d type %d%d%d%d\n", coor currentMoveString[0] = cl.ff + AAA; currentMoveString[1] = cl.rf + ONE; currentMoveString[3] = cl.rt + ONE; + if(killX < 0) // [HGM] lion: do not overwrite kill-square suffix currentMoveString[4] = cl.promoChar; if((cl.kind == WhiteCapturesEnPassant || cl.kind == BlackCapturesEnPassant) && (Match("ep", p) || Match("e.p.", p))); @@ -315,7 +351,7 @@ badMove:// we failed to find algebraic move if(**p == '[') { oldp = ++(*p); if(Match("--", p)) { // "[--" could be start of position diagram - if(!Scan(']', p) && (*p)[-3] == '-' && (*p)[-2] == '-') return PositionDiagram; + if(!Scan(']', p) && (*p)[-3] == '-' && (*p)[-2] == '-') return PositionDiagram; *p = oldp; } SkipWhite(p); @@ -323,7 +359,8 @@ badMove:// we failed to find algebraic move do (*p)++; while(isdigit(**p) || isalpha(**p) || **p == '+' || **p == '-' || **p == '=' || **p == '_' || **p == '#'); SkipWhite(p); - if(*(*p)++ == '"') { + if(**p == '"') { + (*p)++; while(**p != '\n' && (*(*p)++ != '"'|| (*p)[-2] == '\\')); // look for unescaped quote if((*p)[-1] !='"') { *p = oldp; Scan(']', p); return Comment; } // string closing delimiter missing SkipWhite(p); if(*(*p)++ == ']') return PGNTag; @@ -333,14 +370,17 @@ badMove:// we failed to find algebraic move } // ********* SAN Castings ************************************* - if(**p == 'O' || **p == 'o' || **p == '0') { + if(**p == 'O' || **p == 'o' || **p == '0' && !Match("00:", p)) { // exclude 00 in time stamps int castlingType = 0; - if(Match("O-O-O", p) || Match("o-o-o", p) || Match("0-0-0", p) || + if(Match("O-O-O", p) || Match("o-o-o", p) || Match("0-0-0", p) || Match("OOO", p) || Match("ooo", p) || Match("000", p)) castlingType = 2; else if(Match("O-O", p) || Match("o-o", p) || Match("0-0", p) || Match("OO", p) || Match("oo", p) || Match("00", p)) castlingType = 1; if(castlingType) { //code from old parser, collapsed for both castling types, and streamlined a bit int rf, ff, rt, ft; ChessSquare king; + char promo=NULLCHAR; + + if(gameInfo.variant == VariantSChess) promo = PromoSuffix(p); if (yyskipmoves) return (int) AmbiguousMove; /* not disambiguated */ @@ -356,7 +396,7 @@ badMove:// we failed to find algebraic move ff = (BOARD_WIDTH-1)>>1; // this would be d-file if (boards[yyboardindex][rf][ff] == king) { /* ICS wild castling */ - ft = castlingType == 1 ? BOARD_LEFT+1 : BOARD_RGHT-3; + ft = castlingType == 1 ? BOARD_LEFT+1 : (gameInfo.variant == VariantJanus ? BOARD_RGHT-2 : BOARD_RGHT-3); } else { ff = BOARD_WIDTH>>1; // e-file ft = castlingType == 1 ? BOARD_RGHT-2 : BOARD_LEFT+2; @@ -372,12 +412,12 @@ badMove:// we failed to find algebraic move if (appData.debugMode) fprintf(debugFP, "Parser FRC (type=%d) %d %d\n", castlingType, ff, ft); if(ff == NoRights || ft == NoRights) return ImpossibleMove; } - sprintf(currentMoveString, "%c%c%c%c",ff+AAA,rf+ONE,ft+AAA,rt+ONE); + sprintf(currentMoveString, "%c%c%c%c%c",ff+AAA,rf+ONE,ft+AAA,rt+ONE,promo); if (appData.debugMode) fprintf(debugFP, "(%d-type) castling %d %d\n", castlingType, ff, ft); return (int) LegalityTest(boards[yyboardindex], PosFlags(yyboardindex)&~F_MANDATORY_CAPTURE, // [HGM] losers: e.p.! - rf, ff, rt, ft, NULLCHAR); + rf, ff, rt, ft, promo); } } @@ -509,6 +549,8 @@ badMove:// we failed to find algebraic move return Nothing; } + // ********* Prevent 00 in unprotected time stamps to be mistaken for castling ******* + if(Match(":00", p)) return Nothing; // ********* Could not match to anything. Return offending character **** (*p)++; @@ -518,12 +560,14 @@ badMove:// we failed to find algebraic move /* Return offset of next pattern in the current file. */ -int yyoffset() +int +yyoffset () { return ftell(inputFile) - (inPtr - parsePtr); // subtract what is read but not yet parsed } -void yynewfile (FILE *f) +void +yynewfile (FILE *f) { // prepare parse buffer for reading file inputFile = f; inPtr = parsePtr = inputBuf; @@ -532,14 +576,16 @@ void yynewfile (FILE *f) *inPtr = NULLCHAR; // make sure we will start by reading a line } -void yynewstr P((char *s)) +void +yynewstr P((char *s)) { parsePtr = s; inputFile = NULL; fromString = 1; } -int yylex() +int +yylex () { // this replaces the flex-generated parser int result = NextUnit(&parsePtr); char *p = parseStart, *q = yytext; @@ -549,7 +595,8 @@ int yylex() return result; } -int Myylex() +int +Myylex () { // [HGM] wrapper for yylex, which treats nesting of parentheses int symbol, nestingLevel = 0, i=0; char *p; @@ -569,7 +616,8 @@ int Myylex() return symbol; } -ChessMove yylexstr(int boardIndex, char *s, char *buf, int buflen) +ChessMove +yylexstr (int boardIndex, char *s, char *buf, int buflen) { ChessMove ret; char *savPP = parsePtr; @@ -583,4 +631,3 @@ ChessMove yylexstr(int boardIndex, char *s, char *buf, int buflen) fromString = 0; return ret; } -