/* experimental CGI interface to ^z's FreeText indexer/browser "zftir" */ /* next few lines contain global variables and #defines which may need to * be customized or changed frequently */ float zftir_version = 0.015; long zftir_date = 19970709; #define DB_DIRECTORY "/ftp.whatever.directory.goes.here/" /* ZFTIR (c) 1987-1997 Mark Zimmermann --- free software under the GNU GPL! * * CONCEPT: make a Web browser interface to classic FreeText IR files * to enable "real-time high-bandwidth large-scale free-text * information retrieval" for people anywhere on the WWW! * * see http://www.alumni.caltech.edu/~zimm/freetext.html * for philosophical background; see also my chapter in * THE DIGITAL WORD: TEXT-BASED COMPUTING IN THE HUMANITIES * (eds. Landow & Delany, MIT Press, 1993) * * preliminary experiments - do a few of the necessary functions * to permit (in)famous FreeText-style browsing --- specifically: * INDEX - window into list of words and their frequencies * CONTEXT - key-word-in-context instances of every word * TEXT - full text from the database * SEEK - jump INDEX display to a chosen word * * Long-term goals/fantasies: * - extension language (Scheme, e.g. SIOD or GUILE?) for runtime customization * - proximity search & fuzzy-neighborhood retrieval * - multi-file databases * - alternative alphabetizations & character mappings * - relevance-ranked retrieval * - thesaurus-like Index-word-mapping for synonyms/equivalents * - grep-like wild-card search (like agrep, glimpse, etc.?) * - up/down scan to target string in Text View * - fractal space-filling graphical display of Index View * - VRML data visualization - fly around in a dataspace? * - correlation & similarity computations --- auto-identify characteristic words * - locally-running versions (Java?!) for non-networked use */ #include #include #include /* #include */ /* defines and structures */ #define KEYLENGTH 28 #define STRLEN 500 #define DEFAULT_TEXT_CHUNK 100 typedef struct { char kkey[KEYLENGTH]; long ccount; } KEYREC; /* global variables */ char db_name[STRLEN] = ""; char target_word[STRLEN] = ""; char proximity_search[STRLEN] = ""; char script_name[STRLEN] = ""; char query_string[STRLEN] = ""; char doc_filename[STRLEN], key_filename[STRLEN], ptr_filename[STRLEN]; FILE *doc_file, *key_file, *ptr_file; long index_lines = 40; long index_start = 0; long context_lines = 40; long context_start = 0; long context_width = 72; long context_offset = 30; long context_jump = 0; long text_length = 8000; long text_start = 0; long text_jump = 0; int interpret_html = 0; /* prototypes */ /* top level functions */ int main(int argc, char *argv[]); void open_files(void); int parse_query(int argc, char *argv[]); void error_page(int argc, char *argv[]); void header_html(void); void db_form(void); void index_html(void); void context_html(void); void text_html(void); void param_form(void); void zinfo_html(void); void close_files(void); /* lower level functions */ void getkeyrec(KEYREC *recp, long n, FILE *keyfile, long max); long getptrrec(long ptrnum, FILE *ptrfile); char *set_db_name(char *qsp); char *set_target_word(char *qsp); char *set_index_lines(char *qsp); char *set_index_start(char *qsp); char *set_context_lines(char *qsp); char *set_context_start(char *qsp); char *set_context_width(char *qsp); char *set_context_offset(char *qsp); char *set_text_length(char *qsp); char *set_text_start(char *qsp); char *set_interpret_html(char *qsp); char *set_context_jump(char *qsp); char *set_text_jump(char *qsp); long seek_word(void); /* ***** main program ***** */ int main(int argc, char *argv[]) { if (parse_query(argc, argv)) { open_files(); header_html(); db_form(); index_html(); context_html(); text_html(); param_form(); zinfo_html(); close_files(); } else error_page(argc, argv); } /* read and set the parameters for the global variables; return 1 if (apparently) * successful, 0 if error detected ... also set other parameters here as needed * for HTML generation */ int parse_query(int argc, char *argv[]) { char *qsp; /* produce self-referential name of this CGI program for use in links */ strcpy(script_name, "http://"); if (getenv("SERVER_NAME") != NULL) strcat(script_name, getenv("SERVER_NAME")); else return(0); if (getenv("SCRIPT_NAME") != NULL) strcat(script_name, getenv("SCRIPT_NAME")); else return(0); /* load any parameters in QUERY_STRING into their places */ if (getenv("QUERY_STRING") != NULL) strcpy(query_string, getenv("QUERY_STRING")); else return(0); qsp = &query_string[0]; while (*qsp != '\0') { /* load in values; return NULL if problem detected */ switch (*qsp) { case 'd': case 'D': qsp = set_db_name(qsp); break; case 'k': case 'K': qsp = set_target_word(qsp); break; case 'j': case 'J': qsp = set_index_lines(qsp); break; case 'i': case 'I': qsp = set_index_start(qsp); break; case 'l': case 'L': qsp = set_context_lines(qsp); break; case 'c': case 'C': qsp = set_context_start(qsp); break; case 'w': case 'W': qsp = set_context_width(qsp); break; case 'o': case 'O': qsp = set_context_offset(qsp); break; case 'u': case 'U': qsp = set_text_length(qsp); break; case 't': case 'T': qsp = set_text_start(qsp); break; case 'h': case 'H': qsp = set_interpret_html(qsp); break; case 'e': case 'E': qsp = set_context_jump(qsp); break; case 'v': case 'V': qsp = set_text_jump(qsp); break; default: return(0); } if (qsp == NULL) return(0); } if (target_word[0] != '\0') index_start = seek_word(); context_start += context_jump; text_start += text_jump; return(1); } /* handle any problems detected by returning an error/diagnostic page & exiting */ void error_page(int argc, char *argv[]) { int i; printf("Content-type: text/html\n\n"); printf("\n"); printf("\n"); printf("zftir: Error!%s\n"); printf("\n"); printf("\n"); printf("

Sorry!

\n"); printf("There has apparently been an error; please contact Mark Zimmermann\n"); printf("and provide detailed information on how you got here, including the\n"); printf("following diagnostics as well as background on other actions and symptoms\n"); printf("which you observed, so that I can better understand and fix any problems.\n"); printf("

\n"); printf("Thank you! --- ^z\n"); printf("


\n"); printf("
\n");
  printf("zftir version #%f dated %ld\n\n", zftir_version, zftir_date);

  if (argc > 0) {
    printf("Command-line arguments:\n");
    printf("  #    value\n");
    printf(" ---  -------\n");
    for (i = 0; i < argc; ++i)
      printf("  %d    %s\n", i, argv[i]);
  }

  printf("\n\n");
  printf("Environment variables:\n");
  printf("getenv(\"SERVER_NAME\") = %s\n", getenv("SERVER_NAME"));
  printf("getenv(\"SERVER_PORT\") = %s\n", getenv("SERVER_PORT"));
  printf("getenv(\"SCRIPT_NAME\") = %s\n", getenv("SCRIPT_NAME"));
  printf("getenv(\"QUERY_STRING\") = %s\n",getenv("QUERY_STRING"));

  printf("\n\n");
  printf("Internal variables:\n");
  printf("db_name = %s\n", db_name);
  printf("target_word = %s\n", target_word);
  printf("proximity_search = %s\n", proximity_search);
  printf("script_name = %s\n", script_name);
  printf("index_lines = %ld\n", index_lines);
  printf("index_start = %ld\n", index_start);
  printf("context_lines = %ld\n", context_lines);
  printf("context_start = %ld\n", context_start);
  printf("context_width = %ld\n", context_width);
  printf("context_offset = %ld\n", context_offset);
  printf("context_jump = %ld\n", context_jump);
  printf("text_length = %ld\n", text_length);
  printf("text_start = %ld\n", text_start);
  printf("text_jump = %ld\n", text_jump);
  printf("interpret_html = %d\n", interpret_html);
 
  printf("
\n"); printf("
\n"); zinfo_html(); exit(1); } /* open the database, key, and pointer files */ void open_files(void) { if (db_name[0] == '\0') return; strcpy(doc_filename, DB_DIRECTORY); strcat(doc_filename, db_name); strcpy(key_filename, doc_filename); strcat(key_filename, ".k"); strcpy(ptr_filename, doc_filename); strcat(ptr_filename, ".p"); if ((doc_file = fopen(doc_filename, "rb")) == NULL) { printf("ERROR! --- can't open doc_file %s, doc_filename\n", doc_filename); exit(1); } if ((key_file = fopen(key_filename, "rb")) == NULL) { printf("ERROR! --- can't open key_file %s, doc_filename\n", key_filename); exit(1); } if ((ptr_file = fopen(ptr_filename, "rb")) == NULL) { printf("ERROR! --- can't open ptr_file %s, doc_filename\n", ptr_filename); exit(1); } } /* produce header and title to begin a normal ZFTIR page */ void header_html(void) { printf("Content-type: text/html\n\n"); printf("\n"); printf("\n"); printf("zftir: %s\n", db_name); printf("\n"); printf("\n"); printf("Zftir free-text information retrieval experiment\n"); printf("version #%f dated %ld\n\n", zftir_version, zftir_date); printf("(c) 1987-1997\n"); printf("\n"); printf("Mark Zimmermann \n"); if (db_name[0] == '\0') { printf("

\n"); printf("Zftir was opened without a database selection --- please choose\n"); printf("a database on the form below and try again --- thank you!\n"); } printf("


\n"); } /* create the top of the form where the database name is chosen */ void db_form(void) { printf("
\n"); printf("
\n"); } /* provide contact info, finish off the html page, and we're done */ void zinfo_html(void) { printf("zftir = ^z's free-text information retrieval experiment
\n"); printf("version #%f dated %ld\n\n", zftir_version, zftir_date); printf("(c) 1987-1997 by Mark Zimmermann
\n"); printf("Free software under the GNU GPL.
\n"); printf("Thank you! --- ^z\n"); printf("\n"); printf("\n"); } void close_files(void) { fclose(doc_file); fclose(key_file); fclose(ptr_file); } /* ***** lower-level functions to fetch and format data follow ***** */ /* get the nth KEYREC */ void getkeyrec(KEYREC *recp, long n, FILE *keyfile, long max) { if (n < 0 || n > max) { strncpy((char *)recp->kkey, " ", KEYLENGTH); recp->ccount = 0; return; } if (fseek(keyfile, sizeof(KEYREC) * n, 0) != 0) { printf("ERROR in fseek() getting key record #%ld\n", n); exit(1); } if (fread((char *)recp, sizeof(KEYREC), 1, keyfile) == 0) { printf("ERROR in fread() getting key record #%ld\n", n); exit(1); } } /* fetch ptr record # ptrnum from ptrfile */ long getptrrec(long ptrnum, FILE *ptrfile) { long p; if (fseek(ptrfile, sizeof(long) *ptrnum, 0) != 0) { printf("ERROR! in fseek() getting ptr record # %ld!\n", ptrnum); exit(1); } if (fread((char *)&p, sizeof(long), 1, ptrfile) == 0) { printf("ERROR! in fread() getting ptr record #%ld!\n", ptrnum); exit(1); } return(p); } /* following routines read in the variables and return qsp pointing to the * next thing to read, or qsp = NULL if trouble is detected in the input stream */ char *set_db_name(char *qsp) { char *dbp = &db_name[0]; if (*++qsp != '=') return(NULL); ++qsp; while (*qsp != '&' && *qsp != '\0') *dbp++ = *qsp++; *dbp = '\0'; if (*qsp == '&') ++qsp; return(qsp); } char *set_target_word(char *qsp) { char *twp = &target_word[0]; if (*++qsp != '=') return(NULL); ++qsp; while (*qsp != '&' && *qsp != '\0') *twp++ = *qsp++; *twp = '\0'; if (*qsp == '&') ++qsp; return(qsp); } char *set_index_lines(char *qsp) { long i = 0; if (*++qsp != '=') return(NULL); ++qsp; while (*qsp >= '0' && *qsp <= '9') i = 10 * i + *qsp++ - '0'; index_lines = i; if (*qsp == '&') ++qsp; return(qsp); } char *set_index_start(char *qsp) { long i = 0; if (*++qsp != '=') return(NULL); ++qsp; while (*qsp >= '0' && *qsp <= '9') i = 10 * i + *qsp++ - '0'; index_start = i; if (*qsp == '&') ++qsp; return(qsp); } char *set_context_lines(char *qsp) { long i = 0; if (*++qsp != '=') return(NULL); ++qsp; while (*qsp >= '0' && *qsp <= '9') i = 10 * i + *qsp++ - '0'; context_lines = i; if (*qsp == '&') ++qsp; return(qsp); } char *set_context_start(char *qsp) { long i = 0; if (*++qsp != '=') return(NULL); ++qsp; while (*qsp >= '0' && *qsp <= '9') i = 10 * i + *qsp++ - '0'; context_start = i; if (*qsp == '&') ++qsp; return(qsp); } char *set_context_width(char *qsp) { long i = 0; if (*++qsp != '=') return(NULL); ++qsp; while (*qsp >= '0' && *qsp <= '9') i = 10 * i + *qsp++ - '0'; context_width = i; if (*qsp == '&') ++qsp; return(qsp); } char *set_context_offset(char *qsp) { long i = 0; if (*++qsp != '=') return(NULL); ++qsp; while (*qsp >= '0' && *qsp <= '9') i = 10 * i + *qsp++ - '0'; context_offset = i; if (*qsp == '&') ++qsp; return(qsp); } char *set_text_length(char *qsp) { long i = 0; if (*++qsp != '=') return(NULL); ++qsp; while (*qsp >= '0' && *qsp <= '9') i = 10 * i + *qsp++ - '0'; context_offset = i; if (*qsp == '&') ++qsp; return(qsp); } char *set_text_start(char *qsp) { long i = 0; if (*++qsp != '=') return(NULL); ++qsp; while (*qsp >= '0' && *qsp <= '9') i = 10 * i + *qsp++ - '0'; text_start = i; if (*qsp == '&') ++qsp; return(qsp); } char *set_interpret_html(char *qsp) { long i = 0; if (*++qsp != '=') return(NULL); ++qsp; while (*qsp >= '0' && *qsp <= '9') i = 10 * i + *qsp++ - '0'; interpret_html = i; if (*qsp == '&') ++qsp; return(qsp); } char *set_context_jump(char *qsp) { long i = 0; int sign = 1; if (*++qsp != '=') return(NULL); ++qsp; if (*qsp == '-') { sign = -1; ++qsp; } if (*qsp == '+') ++qsp; while (*qsp >= '0' && *qsp <= '9') i = 10 * i + *qsp++ - '0'; context_jump = i * sign; if (*qsp == '&') ++qsp; return(qsp); } char *set_text_jump(char *qsp) { long i = 0; int sign = 1; if (*++qsp != '=') return(NULL); ++qsp; if (*qsp == '-') { sign = -1; ++qsp; } if (*qsp == '+') ++qsp; while (*qsp >= '0' && *qsp <= '9') i = 10 * i + *qsp++ - '0'; text_jump = i * sign; if (*qsp == '&') ++qsp; return(qsp); } /* look up a word in the index and return its number --- SOME DAY!!! ... * will have to turn the word to all caps, make sure the file is open, etc. */ long seek_word(void) { return (0); }