/* ================================================================= *\ biblook -- look up references in a bibindexed BibTeX file This program was specifically developed for use with the computational geometry bibliographic database. The database can be obtained by anonymous ftp from cs.usask.ca in the file `pub/geometry/geombib.tar.Z'. Version 1.0 written by Jeff Erickson , 27 Mar 92 Version 2.0 written by Jeff Erickson , 17 Jun 92 This program is in the public domain. You may use it or modify it to your heart's content, at your own risk. Bouquets, brickbats, and bug fixes may be sent to Jeff Erickson, jeffe@cs.berkeley.edu. %Make% gcc -O -o biblook biblook.c Usage: biblook bibfile [savefile] ----------------------------------------------------------------- HOW IT WORKS: The user can enter any of the following commands: f[ind] [not] Find the entries containing the given words in any field with a prefix matching the argument. For example, `a' matches both `author' and `address', and `au' matches `author' only. If the argument is `-' (or any string with no letters or numbers), match any field. If `not' appears before the , the sense of the search is reversed. The symbols `~' and `!' can be used in place of `not'. Each word is a contiguous sequence of letters and digits. Case is ignored; accents should be omitted; apostrophes are not required. Single characters and a few common words are also ignored. Any word ending with an asterisk is treated as a prefix. Thus, `point*' matches `point', `points', `pointer', etc. and [not] or [not] Intersect (resp. union) the results of the given search with the previous search. Several of these commands may be combined on a single line. Commands are handled in the order in which they appear; there is no precedence. Unlike other commands, and like `not', these must be spelled out completely. `&' can be used in place of `and', and `|' can be used in place of `or'. d[isplay] Display the results of the previous search. s[ave] [] Save the results of the previous results into the specified file. If is omitted, the previous save file is used. If no save file has ever been specified, results are saved in the file specified on the command line. If no such file is specified, `save.bib' is used. If the save file exists, results are appended to it. q[uit]/EOF Quit. Several commands can be combined on a single line by separating them with semicolons. For example, the following command displays all STOC papers cowritten by Erdo"s without `Voronoi diagrams' in the title: f b stoc* | b symp* theory comp* & au erdos & ~t voronoi diagrams ; d ----------------------------------------------------------------- Version history 1.0 3/29/92 Initial version complete 1.1 4/3/92 Fixed GetToken bug. Prompts and feedback messages sent to stderr instead of stdout, so results can be redirected to a file. 2.0 6/17/92 Major change in file format and commands. 1. Allow searching on any field or all fields. 2. More extensive boolean queries (and, or, not) 3. New command to save results to a file 4. New command to display results, rather than displaying them automatically. 5. Allow searching for prefixes 6. Pipe display results through $PAGER or /usr/ucb/more 2.1 7/8/92 Minor bug fixes. 2.3 Bill Jones 93/01/29 1. Declarations common to bibindex.c and biblook.c factored out to new file biblook.h. 2. Index type of (signed) short overflows early; created typedef Index_t, defined as unsigned short. 2.4 Nelson H. F. Beebe [01-Jun-1993] 1. Remove some mixed-mode arithmetic. 2. Add cast to return value of fork(). 3. Correct use and type of numoffsets so that code works if Index_t is "unsigned int" or "unsigned long". \* ================================================================= */ #include "biblook.h" /* ======================= UTILITY FUNCTIONS ======================= */ /* ----------------------------------------------------------------- *\ | void die(const char *msg1, const char *msg2) -- print an error message and die \* ----------------------------------------------------------------- */ void die(const char *msg1, const char *msg2) { fprintf(stderr, "Error: %s %s\n", msg1, msg2); exit(1); } /* ----------------------------------------------------------------- *\ | void safefread(void *ptr, size_t size, size_t num, FILE *fp) | | Read from the file, but die if there's an error. \* ----------------------------------------------------------------- */ void safefread(void *ptr, size_t size, size_t num, FILE *fp) { if (fread(ptr, size, num, fp) < num) die("Unexpected EOF in bix file.", ""); } /* ----------------------------------------------------------------- *\ | char safegetc(FILE *fp) | | Get the next character safely. Used by routines that assume that | they won't run into the end of file. \* ----------------------------------------------------------------- */ char safegetc(FILE *fp) { if (feof(fp)) die("Unexpected EOF in bib file.", ""); return getc(fp); } /* ========================== INDEX TABLES ========================= */ typedef struct { Word theword; Index_t numindex; Index_t *index; } Index, *IndexPtr; typedef struct { Word thefield; Index_t numwords; IndexPtr words; } IndexTable; char numfields; IndexTable *fieldtable; Index_t numoffsets; long *offsets; /* ----------------------------------------------------------------- *\ | void ReadWord(FILE *ifp, Word word) | | Read a "pascal" string into the given buffer \* ----------------------------------------------------------------- */ void ReadWord(FILE *ifp, Word word) { char length; safefread((void *) &length, sizeof(char), 1, ifp); if (length > MAXWORD) die("Index file is corrupt", "(word too long)."); safefread((void *) word, sizeof(char), length, ifp); word[length] = 0; } /* ----------------------------------------------------------------- *\ | void GetOneTable(FILE *ifp, IndexTable *table) | | Get one index table from the file \* ----------------------------------------------------------------- */ void GetOneTable(FILE *ifp, register IndexTable *table) { Index_t i, num; safefread((void *) &table->numwords, sizeof(Index_t), 1, ifp); table->words = (IndexPtr) malloc(table->numwords * sizeof(Index)); for (i=0; inumwords; i++) { ReadWord(ifp, table->words[i].theword); safefread((void *) &num, sizeof(Index_t), 1, ifp); table->words[i].numindex = num; table->words[i].index = (Index_t *) malloc(num * sizeof(Index_t)); safefread((void *) table->words[i].index, sizeof(Index_t), num, ifp); } } /* ----------------------------------------------------------------- *\ | void GetTables(char *filename) | | Get the tables from the index file. \* ----------------------------------------------------------------- */ void GetTables(char *filename) { int version, i; FILE *ifp; ifp = fopen(filename, "r"); if (!ifp) die("Can't read", filename); if (fscanf(ifp, "bibindex %d %*[^\n]%*c", &version) < 1) die(filename, "is not a bibindex file!"); if (version != FILE_VERSION) die(filename, "is the wrong version. Please rerun bibindex."); safefread((void *) &numoffsets, sizeof(Index_t), 1, ifp); offsets = (long *) malloc(numoffsets * sizeof(long)); safefread((void *) offsets, sizeof(long), numoffsets, ifp); safefread((void *) &numfields, sizeof(char), 1, ifp); fieldtable = (IndexTable *) malloc(numfields * sizeof(IndexTable)); for (i=0; i=lo) { mid = (hi+lo)/2; cmp = strcmp(word, words[mid].theword); if (cmp == 0) return mid; else if (cmp < 0) hi = mid-1; else if (cmp > 0) lo = mid+1; } if (prefix && !strncmp(word, words[lo].theword, strlen(word))) return lo; else return INDEX_NAN; } /* =================== SET MANIPULATION ROUTINES =================== */ #define SETSCALE (sizeof(unsigned long)*8) static Index_t setsize; static unsigned long setmask; /* used to erase extra bits */ typedef unsigned long *Set; /* ----------------------------------------------------------------- *\ | Set NewSet(void) | | Get a new variable to hold sets of integers in the range | [0, numoffsets]. Set setsize and setmask. \* ----------------------------------------------------------------- */ Set NewSet(VOID) { setsize = (numoffsets + SETSCALE - 1)/SETSCALE; /* HACK */ setmask = (1<<(numoffsets%SETSCALE)) - 1; /* KLUDGE */ return (Set) malloc(setsize * SETSCALE); } /* ----------------------------------------------------------------- *\ | void EmptySet(Set theset) | | Empty the set. \* ----------------------------------------------------------------- */ void EmptySet(Set theset) { register Index_t i; for (i=0; i= (int)numoffsets) /* extra bits might be set */ return; putc('\n', ofp); if (fseek(bibfp, offsets[entry], 0)) die("Index file is corrupt.", ""); ch = safegetc(bibfp); while (ch != '@') { putc(ch, ofp); ch = safegetc(bibfp); } while ((ch != '{') && (ch != '(')) { putc(ch, ofp); ch = safegetc(bibfp); } braces = quotes = 0; putc(ch, ofp); ch = safegetc(bibfp); while (braces || quotes || ((ch != '}') && (ch != ')'))) { if (ch == '{') braces++; else if (ch == '}') braces--; else if ((ch == '"') && !braces) quotes = !quotes; putc(ch, ofp); ch = safegetc(bibfp); } putc(ch, ofp); putc('\n', ofp); } /* ----------------------------------------------------------------- *\ | void PrintResults(char *filename) | | Print the current search results into the given file. If the | filename is NULL, pipe the output through $PAGER. \* ----------------------------------------------------------------- */ void PrintResults(char *filename) { int numresults; FILE *ofp; char *pager; char *the_tmpfile = (char*)NULL; int childpid; numresults = CountSet(results); if (numresults == 0) printf("\tNothing to display!\n"); else if (numresults == numoffsets) printf("\tI can't display the entire bibliography!\n"); else { if (filename) { ofp = fopen(filename, "a"); if (!ofp) { printf("\tCan't write to \"%s\"!\n", filename); return; } } else { the_tmpfile = (char*)tempnam(NULL, "bibl."); ofp = fopen(the_tmpfile, "w"); if (!ofp) { printf("\tCan't write to \"%s\"!\n", the_tmpfile); return; } } if (filename) { time_t now = time(0); fprintf(ofp, "%% Retrieved by biblook %d.%d at %s", MAJOR_VERSION, MINOR_VERSION, ctime(&now)); } DoForSet(results, (void (*)(int, void *)) PrintEntry, (void *) ofp); fclose(ofp); if (filename) printf("\tResults saved in \"%s\"\n", filename); else { pager = (char*)getenv("PAGER"); if ((childpid = (int)fork()) != 0) waitpid(childpid, (int *) 0, 0); else if (pager) { execlp(pager, pager, the_tmpfile, (char *) 0); perror(pager); /* should never get here! */ exit(0); } else { execl("/usr/ucb/more", "more", the_tmpfile, (char *) 0); perror("/usr/ucb/more"); exit(0); } unlink(the_tmpfile); free(the_tmpfile); /* malloc'ed by tempnam() */ putchar('\n'); } } } /* ======================== USER INTERFACE ========================= */ typedef enum { T_Find, T_Display, T_Save, T_Quit, T_Word, T_And, T_Or, T_Not, T_Semi, T_Return, T_Help } Token; /* ----------------------------------------------------------------- *\ | Token GetToken(char *tokenstr) | | Get the next input token. \* ----------------------------------------------------------------- */ Token GetToken(char *tokenstr) { static char line[256]; static short pos; static char neednew = 1; short tlen = 0; *tokenstr = 0; if (neednew) { printf("biblook: "); if (!fgets(line, 254, stdin)) return T_Quit; pos = 0; neednew = 0; } while ((line[pos] == ' ') || (line[pos] == '\t')) pos++; switch (line[pos]) { case '\n': pos++; neednew = 1; return T_Return; case '&': pos++; return T_And; case '|': pos++; return T_Or; case '~': case '!': pos++; return T_Not; case ';': pos++; return T_Semi; default: tokenstr[tlen++] = tolower(line[pos++]); while (!isspace(line[pos]) && (line[pos] != ';') && (line[pos] != '&') && (line[pos] != '|')) { tokenstr[tlen++] = tolower(line[pos++]); } tokenstr[tlen] = 0; /* I really ought to use a hash table here. */ if (!strncmp(tokenstr, "find", tlen)) return T_Find; else if (!strncmp(tokenstr, "display", tlen)) return T_Display; else if (!strncmp(tokenstr, "help", tlen)) return T_Help; else if (!strncmp(tokenstr, "?", tlen)) return T_Help; else if (!strncmp(tokenstr, "save", tlen)) return T_Save; else if (!strncmp(tokenstr, "quit", tlen)) return T_Quit; else if (!strcmp(tokenstr, "and")) return T_And; else if (!strcmp(tokenstr, "or")) return T_Or; else if (!strcmp(tokenstr, "not")) return T_Not; else return T_Word; } } /* ----------------------------------------------------------------- *\ | char Strip(char *string) | | Strip all but alphanumeric characters out of the string. Return | true if the original string ended with the prefix character '*'. \* ----------------------------------------------------------------- */ char Strip(char *string) { char prefix = 0; char *src = string; while (*src) { prefix = (*src == '*'); if (isalnum(*src)) *string++ = *src; src++; } *string = 0; return prefix; } /* ----------------------------------------------------------------- *\ | void CmdError(void) | | Print syntax error message \* ----------------------------------------------------------------- */ void CmdError(VOID) { printf("?? Syntax error ??\n"); } static const char* helplines[] = { "biblook permits rapid lookup in a BibTeX bibliography data", "base, using a compact binary index file prepared by bibindex(1).", "", "Available commands:", "? or h[elp]", " Display this help message.", "", "f[ind] [not] ", " Find the entries containing the given words in any", " field with a prefix matching the argument. For", " example, `a' matches both `author' and `address', and", " `au' matches `author' only. If the argument is", " `-' (or any string with no letters or numbers), match", " any field.", "", " If `not' appears before the , the sense of the", " search is reversed. The symbols `~' and `!' can be", " used in place of `not'.", "", " Each word is a contiguous sequence of letters and", " digits. Case is ignored; accents should be omitted;", " apostrophes are not required. Single characters and a", " few common words are also ignored. Any word ending", " with an asterisk is treated as a prefix. Thus,", " `point*' matches `point', `points', `pointer', etc.", "", "and [not] ", "or [not] ", " Intersect (resp. union) the results of the given search", " with the previous search. Several of these commands", " may be combined on a single line. Commands are handled", " in the order in which they appear; there is no pre-", " cedence. Unlike other commands, and like `not', these", " must be spelled out completely. `&' can be used in", " place of `and', and `|' can be used in place of `or'.", "", "d[isplay]", " Display the results of the previous search.", "", "s[ave] []", " Save the results of the previous results into the", " specified file. If is omitted, the previous", " save file is used. If no save file has ever been", " specified, results are saved in the file specified on", " the command line. If no such file is specified,", " `save.bib' is used. If the save file exists, results", " are appended to it.", "", "q[uit]/EOF", " Quit.", "", "Several commands can be combined on a single line by", "separating them with semicolons. For example, the following", "command displays all STOC papers cowritten by Erdo\"s", "without `Voronoi diagrams' in the title:", "", "f b stoc* | b symp* theory comp* & au erdos & ~t voronoi diagrams ; d", "", (const char*)NULL, }; /* ----------------------------------------------------------------- *\ | void GiveHelp(void) | | Print a help message. Lines are stored as separate strings to | avoid hitting compiler limits. \* ----------------------------------------------------------------- */ void GiveHelp(VOID) { int k; for (k = 0; helplines[k]; ++k) printf("\t%s\n",helplines[k]); } /* ----------------------------------------------------------------- *\ | States for Lookup() \* ----------------------------------------------------------------- */ typedef enum { Wait, Find, FindN, FindF, FindW, Display, Save, SaveF, Error } CmdState; /* ----------------------------------------------------------------- *\ | void Lookup(const char *defsave) | | Execute commands until the user quits. Defsave is the default | save file name. This is one big finite state machine. It's long | and boring, but that's interface code for ya! \* ----------------------------------------------------------------- */ void Lookup(const char *defsave) { char tokenstr[256]; char savefile[256]; CmdState state = Wait; Token thetoken; char intersect = 1; /* 1 = intersect, 0 = union */ char invert = 0; /* 1 = invert */ char prefix; /* 1 = word is really a prefix */ ClearResults(); strcpy(savefile, defsave); while (1) { thetoken = GetToken(tokenstr); if ((thetoken == T_Quit) && !tokenstr[0]) return; else if (thetoken == T_Help) { GiveHelp(); continue; } switch (state) { case Wait: switch (thetoken) { case T_Quit: return; case T_Find: state = Find; ClearResults(); break; case T_And: state = Find; SaveResults(); break; case T_Or: state = Find; intersect = 0; SaveResults(); break; case T_Display: state = Display; break; case T_Save: state = Save; break; case T_Return: case T_Semi: break; default: state = Error; CmdError(); break; } break; case Find: if (thetoken == T_Not) { state = FindN; invert = 1; } else { if (tokenstr[0]) { state = FindF; Strip(tokenstr); if (!SetUpField(tokenstr)) state = Error; } else { state = (thetoken == T_Return) ? Wait : Error; CmdError(); } } break; case FindN: if (tokenstr[0]) { state = FindF; Strip(tokenstr); if (!SetUpField(tokenstr)) state = Error; } else { state = (thetoken == T_Return) ? Wait : Error; CmdError(); } break; case FindF: if (tokenstr[0]) { state = FindW; prefix = Strip(tokenstr); FindWord(tokenstr, prefix); } else { state = (thetoken == T_Return) ? Wait : Error; CmdError(); } break; case FindW: switch (thetoken) { case T_And: state = Find; CombineResults(invert, intersect); SaveResults(); invert = 0; intersect = 1; break; case T_Or: state = Find; CombineResults(invert, intersect); SaveResults(); invert = 0; intersect = 0; break; case T_Semi: state = Wait; CombineResults(invert, intersect); invert = 0; intersect = 1; break; case T_Return: state = Wait; CombineResults(invert, intersect); ReportResults(); invert = 0; intersect = 1; break; default: if (tokenstr[0]) { state = FindW; prefix = Strip(tokenstr); FindWord(tokenstr, prefix); } else { state = Error; CmdError(); } break; } break; case Display: if ((thetoken == T_Semi) || (thetoken == T_Return)) { state = Wait; PrintResults(NULL); } else { state = Error; CmdError(); } break; case Save: if (tokenstr[0]) { state = SaveF; strcpy(savefile, tokenstr); } else if ((thetoken == T_Semi) || (thetoken == T_Return)) { state = Wait; PrintResults(savefile); } else { state = Error; CmdError(); } break; case SaveF: if ((thetoken == T_Semi) || (thetoken == T_Return)) { state = Wait; PrintResults(savefile); } else { state = Error; CmdError(); } break; case Error: switch (thetoken) { case T_Quit: return; case T_Return: state = Wait; break; default: break; } break; } } } /* ================================================================= *\ | The main program \* ================================================================= */ int main(int argc, char **argv) { char bibfile[256]; char bixfile[256]; struct stat bibstat, bixstat; char *p; printf("biblook version %d.%d file version %d\n", (int)MAJOR_VERSION, (int)MINOR_VERSION, (int)FILE_VERSION); printf("Type ? or h for help\n"); if ((argc != 2) && (argc != 3)) { fprintf(stderr, "Usage: biblook bib [savefile]\n"); exit(1); } if (((p = strrchr(argv[1],'.')) != (char*)NULL) && (strcmp(p, ".bib") == 0)) *p = '\0'; /* remove any .bib extension */ sprintf(bibfile, "%s.bib", argv[1]); sprintf(bixfile, "%s.bix", argv[1]); stat(bibfile, &bibstat); stat(bixfile, &bixstat); if (bibstat.st_mtime > bixstat.st_mtime) die(bixfile, "is out of date. Please rerun bibindex."); bibfp = fopen(bibfile, "r"); if (!bibfp) die("Can't read", bibfile); GetTables(bixfile); InitSearch(); if (argc == 3) Lookup(argv[2]); else Lookup("save.bib"); FreeSearch(); FreeTables(); fclose(bibfp); exit(0); return(0); }