Source Code & Software Patents: A Guide to Software & Internet Patent Litigation for Attorneys & Experts
by Andrew Schulman (http://www.SoftwareLitigationConsulting.com)
Code example for Chapter 1: What’s so special (or not so special) about source code?
Code example: mkndx & find in C
mkndx creates an index of lines of source code; find searches in indices created by mkndx
[TODO: show how to use, what output looks like, what index files look like]
[TODO: use strhash.c below to explain data structures, e.g. linked list, table, etc.; note that VBS and awk code merely assume the presence of associated arrays; strhash.c implements them, and strhash.h provides an interface]
[TODO: link to executable files for Windows, Mac OSX; show portion of disassembly; show portion of hexdump]
========
// ndx.c // TODO: // change strhash so val is (void *) to be determined by user // error if can't open file? // output current count to stderr // show ls equiv to "dir /s/b /a-d" #include <stdlib.h> #include <stdio.h> #include "strhash.h" void fail(char *s) { puts(s); exit(1); } void *alloc(int size) {        void *p = calloc(size, 1); // 0-init        if (! p) fail("insufficient memory");        return p; } HASHTAB files = (HASHTAB) 0; int get_files(char *s) {        static int cnt = 0; // only init once!        int this_cnt = 0; // init each time        char *cmd, *buf;        FILE *f;               if (! files) // one-time init                if (! (files = MakeHashTab()))                        fail("Can't create files list");        if (*s == '@')        {            f = fopen(s+1, "r");            if (! f) fail("can't open @file");        }        else        {            cmd = alloc(1024);            cmd[0] = '\0';            strcpy(cmd, "dir /s/b /a-d ");            strcat(cmd, s);            strcat(cmd, " > __tmp.tmp"); // TODO: make temp file            //fprintf(stderr, "get_files: %s\n", cmd);            system(cmd);            free(cmd);            f = fopen("__tmp.tmp", "r");            if (! f) fail("can't open __tmp.tmp");        }        buf = alloc(2048);        while (fgets(buf, 2047, f))        {       if ((! *buf) || (*buf == '#')) // comments in @file          continue;       buf[strlen(buf)-1] = '\0'; // remove \n       cnt++;       this_cnt++;       //fprintf(stderr, "get_files: <%s> [%u]\n", buf, cnt);       SetHashTab(files, buf, cnt); // cnt = fnum        }        fclose(f);        free(buf);        return this_cnt; } int print_func(char *s, int count, int val, unsigned *p) {       if (count == 1)       printf("%u\t%s\t[%u,]\n", count, s, val);       else       {       int i, top;       printf("%u\t%s\t[", count, s);       top = min(50, count);       for (i=0; i<top; i++)       {       printf("%u,", p[i]);       }       printf("]\n");        }        return 1; } HASHTAB strings = (HASHTAB) 0; static int gcnt = 0; int do_file(char *fname, int count, int fnum, unsigned *p) {    static int fcnt = 0;    int cnt = 0;    FILE *f;    char *buf;    fcnt++;    fprintf(stderr, "%u (%u%%)\t%s\t%u\n", fcnt, (100*fcnt) / (1+gcnt), fname, fnum);    printf("%u\t%s\n", fnum, fname);    f = fopen(fname, "r");    // if (! f) fail("can't open file"); // no, keep going    if (! f) { fprintf(stderr, "\nCan't open <%s>\n\n", fname); return 0; }    buf = alloc(10240);    // TODO: need to create temp hashtab, so can do 1 entry per file    // TODO: maybe change MakeHashTab to take param with tab_size, store    while (fgets(buf, 10239, f))    {       char *s = buf;       buf[strlen(buf)-1] = '\0'; // remove \n       while (*s == ' ' || *s == '\t') s++; // remove leading spaces       if (*s)       {          cnt++;          SetHashTab(strings, s, fnum); // will keep count       }    }    fclose(f);    free(buf);    return 1; } main(int argc, char *argv[]) {    unsigned i;    if (argc < 2)       fail("usage: ndx [file*.ext or @file]");    for (i=1; i<argc; i++)       gcnt += get_files(argv[i]);    printf("%u files\n", gcnt);    // ForXInHashtab(files, print_func);    puts("Files\n");    strings = MakeHashTab();    ForXInHashtab(files, do_file);    fputs("Generating output...\n", stderr);    puts("\nStrings\n");    ForXInHashtab(strings, print_func);    FreeHashTab(files);    FreeHashTab(strings); } ========== strhash.h: // strhash.h typedef void *HASHTAB ; HASHTAB MakeHashTab(void); void FreeHashTab(HASHTAB hashtab); void SetHashTab(HASHTAB hashtab, char *s, unsigned val); int GetHashTab(HASHTAB hashtab, char *s); typedef int (*FORIN_FUNC)(char *s, int count, unsigned val, unsigned *p); void ForXInHashtab(HASHTAB hashtab, FORIN_FUNC func); ========== // strhash.c // TODO: replace val with count and union of val and void* // use val when count == 1; in one test (Firefox source), 80% of // entries appeared only 1x #include <stdlib.h> #include <stdio.h> #include "strhash.h" typedef struct _node {    struct _node *next;    char *s;    unsigned val;    } NODE; void _fail(const char *s) { puts(s); exit(1); } void *my_alloc(int x, int y) {    void *p = calloc(x,y); // zero init    if (! p)        _fail("Error: insufficient memory");    return p; } void insert(NODE **list, char *s, unsigned val) {    NODE *node;    NODE *l2 = *list;    while (l2)    {        if (strcmp(l2->s, s) == 0) // could do case-insensitive        {                l2->val = val; // took 1 hr. to find bug where omitted this                return; // already in list        }        else            l2 = l2->next;    }    node = (NODE *) my_alloc(1, sizeof(NODE));    node->next = *list;    *list = node;    node->s = my_alloc(1, strlen(s)+1);    strcpy(node->s, s);    node->val = val; } int find(NODE *list, char *s) {    while (list)    {       if (*s == *(list->s))       {       static int num_strcmp = 0;       num_strcmp++;       if ((num_strcmp % 1000000) == 0)          fprintf(stderr, "\n\n!! %u calls to strcmp\n\n\n", num_strcmp);       if (strcmp(list->s, s) == 0)          return list->val;       else          list = list->next;       }       else       list = list->next;    }    return 0; // valid val begins with 1 } // prime numbers: 9973 11113 13997 16943 18917 // TODO: do TAB_SIZE and hash make sense together? #define TAB_SIZE 11113 // see http://www.cs.yorku.ca/~oz/hash.html unsigned long HASH(unsigned char *s) {    unsigned long hash = 5381;    int c;    while (c = *s++) {            hash = ((hash << 5) + hash) + c;    }         return (hash % TAB_SIZE); } NODE **mk_hashtab(void) {    return (NODE **) my_alloc(TAB_SIZE, sizeof(NODE *)); } HASHTAB MakeHashTab(void) {    return (HASHTAB) mk_hashtab(); } void FreeHashTab(HASHTAB hashtab) {    free(hashtab); } void set(NODE **hashtab, char *s, unsigned val) {    insert(&hashtab[HASH(s)], s, val); } void SetHashTab(HASHTAB hashtab, char *s, unsigned val) {    set((NODE**) hashtab, s, val); } int get(NODE **hashtab, char *s) {    return find(hashtab[HASH(s)], s); } int GetHashTab(HASHTAB hashtab, char *s) {    return get((NODE**)hashtab, s); } typedef int (*FORIN_FUNC)(char *s, unsigned val); void for_in(NODE **hashtab, FORIN_FUNC func) {    NODE *list;    int i;   for (i=0; i<TAB_SIZE; i++)   {    if (list = hashtab[i])    {       while (list)       {       if (! (*func)(list->s, list->count, list->val, list->p))          return; // done if func returns 0       list = list->next;       }    }   } } void ForXInHashtab(HASHTAB hashtab, FORIN_FUNC func) {    for_in((NODE**) hashtab, func); } #ifdef TESTING int print_func(char *s, unsigned val) {    printf("%u\t%s\n", val, s);    return 1; } main(int argc, char *argv[]) {    NODE **hashtab;    char buf[10240];    int i, cnt=0;       hashtab = mk_hashtab();    while (gets(buf)) // only get up to buf size    {        char *s = buf;        while (*s == ' ' || *s == '\t') s++; // remove leading spaces        if (*s)            set(hashtab, s, ++cnt);    }    for_in(hashtab, print_func); } #endif ======= find.c: // find.c #include <stdlib.h> #include <stdio.h> #include <string.h> #include <ctype.h> #include "strhash.h" void fail(char *s) { puts(s); exit(1); } int case_sensitive = 0; char *strlower(char *s) // not in place; make copy {        char *s2 = malloc(strlen(s)+1);        char *p;        if (! s2) fail("insufficient memory");        s2[0] = '\0';        strcpy(s2, s); // unsafe        for (p=s2; *p; p++) {                if ((*p >= 'A') && (*p <= 'Z'))                        *p = *p += 0x20; // 'a' - 'A' }        return s2; // caller responsibility to free } char *match(char *str1, char *str2) {    if (case_sensitive)       return strstr(str1, str2);    else    {       static char *s2 = (char *) 0;       char *s1 = strlower(str1);       char *ret;       if (! s2) // one time, not going to change          s2 = strlower(str2);       ret = strstr(s1, s2);       free(s1);       // free(s2);       return ret;    } } main(int argc, char *argv[]) {    HASHTAB hashtab;    FILE *f;    char *pat, *ndxfile, *buf, *s;    typedef char *STRING;    STRING arr[3];    int i, cnt;    int in_files = 0, in_strings = 0;    if (argc < 3)       fail("usage: find [pat] [ndxfile]");    pat = argv[1];    printf("pat: <%s>\n", pat);    ndxfile = argv[2];    if (! (f = fopen(ndxfile, "r"))) fail("can't open ndxfile");    if (! (hashtab = MakeHashTab())) fail("can't allocate hashtab");    if (! (buf = calloc(10240, 1))) fail("insufficient memory");    while (fgets(buf, 10239, f))    {       s = buf;       if ((! *s) || (*s == '#') || (*s == '\n')) // comments or blank lines          continue;       s[strlen(s)-1] = '\0'; // remove \n       while (*s == ' ' || *s == '\t') s++; // remove leading spaces       if (! *s)          continue;       if (arr[0] = strtok(s, "\t")) cnt = 1;       if (arr[1] = strtok(NULL, "\t")) cnt = 2;       if (arr[2] = strtok(NULL, "\t")) cnt = 3;      if (cnt == 1)      {       if (strcmp(arr[0], "Files") == 0) // doh, remember match ret 0       {          in_files = 1;       }       else if (strcmp(arr[0], "Strings") == 0)       {          in_files = 0;          in_strings = 1;          fputs("Done with files\n", stderr);       }      }      if (in_files && (cnt == 2)) // TODO: error msg if cnt != 2      {       // TODO: need ability to do val -> str,       // following is hack       // could also use first line with "%u files" to alloc array       char *s = malloc(strlen(arr[1])+1);       if (! s) fail("insufficient memory");       s[0] = '\0';       strcpy(s, arr[1]);       SetHashTab(hashtab, arr[0], (unsigned) s); // not atoi       //SetHashTab(hashtab, arr[1], atoi(arr[0])); //TODO: need ability to do val->str      }      if (in_strings && (cnt == 3) && match(arr[1], pat)) //TODO: err msg if cnt!=3      {       int fcount = atoi(arr[0]);       char *str = arr[1];       char *fnums = arr[2];       int len = strlen(fnums);       int fnum;       if (*fnums == '[') fnums++;       if (fnums[len-2] == ']') fnums[len-2] = '\0' ;       if ((fcount == 1) && (fnums[len-3] == ','))       {          fnums[len-3] = '\0';          printf("%s\n\t%s\n", str, GetHashTab(hashtab, fnums));       }       else if (fcount > 1)       {          char *fnumstr;          printf("%s\n", str);          if (fcount >= 50)             printf("COMMON STRING: in %u files, including:\n", fcount);          fnumstr = strtok(fnums, ",");          do {             printf("\t%s\n", GetHashTab(hashtab, fnumstr));          } while (fnumstr = strtok(NULL, ","));       }      }    }    fclose(f);    free(buf); }