Source Code & Software Patents: A Guide to Software & Internet Patent Litigation for Attorneys & Experts
by Andrew Schulman (http://www.SoftwareLitigationConsulting.com)
Code example for Chapter 1: What’s so special (or not so special) about source code?
Code example: mkndx & find in C
mkndx creates an index of lines of source code; find searches in indices created by mkndx
[TODO: show how to use, what output looks like, what index files look like]
[TODO: use strhash.c below to explain data structures, e.g. linked list, table, etc.; note that VBS and awk code merely assume the presence of associated arrays; strhash.c implements them, and strhash.h provides an interface]
[TODO: link to executable files for Windows, Mac OSX; show portion of disassembly; show portion of hexdump]
========
// ndx.c
// TODO:
// change strhash so val is (void *) to be determined by user
// error if can't open file?
// output current count to stderr
// show ls equiv to "dir /s/b /a-d"
#include <stdlib.h>
#include <stdio.h>
#include "strhash.h"
void fail(char *s) { puts(s); exit(1); }
void *alloc(int size)
{
void *p = calloc(size, 1); // 0-init
if (! p) fail("insufficient memory");
return p;
}
HASHTAB files = (HASHTAB) 0;
int get_files(char *s)
{
static int cnt = 0; // only init once!
int this_cnt = 0; // init each time
char *cmd, *buf;
FILE *f;
if (! files) // one-time init
if (! (files = MakeHashTab()))
fail("Can't create files list");
if (*s == '@')
{
f = fopen(s+1, "r");
if (! f) fail("can't open @file");
}
else
{
cmd = alloc(1024);
cmd[0] = '\0';
strcpy(cmd, "dir /s/b /a-d ");
strcat(cmd, s);
strcat(cmd, " > __tmp.tmp"); // TODO: make temp file
//fprintf(stderr, "get_files: %s\n", cmd);
system(cmd);
free(cmd);
f = fopen("__tmp.tmp", "r");
if (! f) fail("can't open __tmp.tmp");
}
buf = alloc(2048);
while (fgets(buf, 2047, f))
{
if ((! *buf) || (*buf == '#')) // comments in @file
continue;
buf[strlen(buf)-1] = '\0'; // remove \n
cnt++;
this_cnt++;
//fprintf(stderr, "get_files: <%s> [%u]\n", buf, cnt);
SetHashTab(files, buf, cnt); // cnt = fnum
}
fclose(f);
free(buf);
return this_cnt;
}
int print_func(char *s, int count, int val, unsigned *p)
{
if (count == 1)
printf("%u\t%s\t[%u,]\n", count, s, val);
else
{
int i, top;
printf("%u\t%s\t[", count, s);
top = min(50, count);
for (i=0; i<top; i++)
{
printf("%u,", p[i]);
}
printf("]\n");
}
return 1;
}
HASHTAB strings = (HASHTAB) 0;
static int gcnt = 0;
int do_file(char *fname, int count, int fnum, unsigned *p)
{
static int fcnt = 0;
int cnt = 0;
FILE *f;
char *buf;
fcnt++;
fprintf(stderr, "%u (%u%%)\t%s\t%u\n", fcnt,
(100*fcnt) / (1+gcnt), fname, fnum);
printf("%u\t%s\n", fnum, fname);
f = fopen(fname, "r");
// if (! f) fail("can't open file"); // no, keep going
if (! f) { fprintf(stderr, "\nCan't open <%s>\n\n", fname); return 0; }
buf = alloc(10240);
// TODO: need to create temp hashtab, so can do 1 entry per file
// TODO: maybe change MakeHashTab to take param with tab_size, store
while (fgets(buf, 10239, f))
{
char *s = buf;
buf[strlen(buf)-1] = '\0'; // remove \n
while (*s == ' ' || *s == '\t') s++; // remove leading spaces
if (*s)
{
cnt++;
SetHashTab(strings, s, fnum); // will keep count
}
}
fclose(f);
free(buf);
return 1;
}
main(int argc, char *argv[])
{
unsigned i;
if (argc < 2)
fail("usage: ndx [file*.ext or @file]");
for (i=1; i<argc; i++)
gcnt += get_files(argv[i]);
printf("%u files\n", gcnt);
// ForXInHashtab(files, print_func);
puts("Files\n");
strings = MakeHashTab();
ForXInHashtab(files, do_file);
fputs("Generating output...\n", stderr);
puts("\nStrings\n");
ForXInHashtab(strings, print_func);
FreeHashTab(files);
FreeHashTab(strings);
}
==========
strhash.h:
// strhash.h
typedef void *HASHTAB ;
HASHTAB MakeHashTab(void);
void FreeHashTab(HASHTAB hashtab);
void SetHashTab(HASHTAB hashtab, char *s, unsigned val);
int GetHashTab(HASHTAB hashtab, char *s);
typedef int (*FORIN_FUNC)(char *s, int count, unsigned val, unsigned *p);
void ForXInHashtab(HASHTAB hashtab, FORIN_FUNC func);
==========
// strhash.c
// TODO: replace val with count and union of val and void*
// use val when count == 1; in one test (Firefox source), 80% of
// entries appeared only 1x
#include <stdlib.h>
#include <stdio.h>
#include "strhash.h"
typedef struct _node {
struct _node *next;
char *s;
unsigned val;
} NODE;
void _fail(const char *s) { puts(s); exit(1); }
void *my_alloc(int x, int y)
{
void *p = calloc(x,y); // zero init
if (! p)
_fail("Error: insufficient memory");
return p;
}
void insert(NODE **list, char *s, unsigned val)
{
NODE *node;
NODE *l2 = *list;
while (l2)
{
if (strcmp(l2->s, s) == 0) // could do case-insensitive
{
l2->val = val; // took 1 hr. to find bug where omitted this
return; // already in list
}
else
l2 = l2->next;
}
node = (NODE *) my_alloc(1, sizeof(NODE));
node->next = *list;
*list = node;
node->s = my_alloc(1, strlen(s)+1);
strcpy(node->s, s);
node->val = val;
}
int find(NODE *list, char *s)
{
while (list)
{
if (*s == *(list->s))
{
static int num_strcmp = 0;
num_strcmp++;
if ((num_strcmp % 1000000) == 0)
fprintf(stderr, "\n\n!! %u calls to strcmp\n\n\n", num_strcmp);
if (strcmp(list->s, s) == 0)
return list->val;
else
list = list->next;
}
else
list = list->next;
}
return 0; // valid val begins with 1
}
// prime numbers: 9973 11113 13997 16943 18917
// TODO: do TAB_SIZE and hash make sense together?
#define TAB_SIZE 11113
// see http://www.cs.yorku.ca/~oz/hash.html
unsigned long HASH(unsigned char *s)
{
unsigned long hash = 5381;
int c;
while (c = *s++) {
hash = ((hash << 5) + hash) + c;
}
return (hash % TAB_SIZE);
}
NODE **mk_hashtab(void) {
return (NODE **) my_alloc(TAB_SIZE, sizeof(NODE *));
}
HASHTAB MakeHashTab(void) {
return (HASHTAB) mk_hashtab();
}
void FreeHashTab(HASHTAB hashtab) {
free(hashtab);
}
void set(NODE **hashtab, char *s, unsigned val) {
insert(&hashtab[HASH(s)], s, val);
}
void SetHashTab(HASHTAB hashtab, char *s, unsigned val) {
set((NODE**) hashtab, s, val);
}
int get(NODE **hashtab, char *s) {
return find(hashtab[HASH(s)], s);
}
int GetHashTab(HASHTAB hashtab, char *s) {
return get((NODE**)hashtab, s);
}
typedef int (*FORIN_FUNC)(char *s, unsigned val);
void for_in(NODE **hashtab, FORIN_FUNC func)
{
NODE *list;
int i;
for (i=0; i<TAB_SIZE; i++)
{
if (list = hashtab[i])
{
while (list)
{
if (! (*func)(list->s, list->count, list->val, list->p))
return; // done if func returns 0
list = list->next;
}
}
}
}
void ForXInHashtab(HASHTAB hashtab, FORIN_FUNC func)
{
for_in((NODE**) hashtab, func);
}
#ifdef TESTING
int print_func(char *s, unsigned val)
{
printf("%u\t%s\n", val, s);
return 1;
}
main(int argc, char *argv[])
{
NODE **hashtab;
char buf[10240];
int i, cnt=0;
hashtab = mk_hashtab();
while (gets(buf)) // only get up to buf size
{
char *s = buf;
while (*s == ' ' || *s == '\t') s++; // remove leading spaces
if (*s)
set(hashtab, s, ++cnt);
}
for_in(hashtab, print_func);
}
#endif
=======
find.c:
// find.c
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include "strhash.h"
void fail(char *s) { puts(s); exit(1); }
int case_sensitive = 0;
char *strlower(char *s) // not in place; make copy
{
char *s2 = malloc(strlen(s)+1);
char *p;
if (! s2) fail("insufficient memory");
s2[0] = '\0';
strcpy(s2, s); // unsafe
for (p=s2; *p; p++) {
if ((*p >= 'A') && (*p <= 'Z'))
*p = *p += 0x20; // 'a' - 'A'
}
return s2; // caller responsibility to free
}
char *match(char *str1, char *str2)
{
if (case_sensitive)
return strstr(str1, str2);
else
{
static char *s2 = (char *) 0;
char *s1 = strlower(str1);
char *ret;
if (! s2) // one time, not going to change
s2 = strlower(str2);
ret = strstr(s1, s2);
free(s1);
// free(s2);
return ret;
}
}
main(int argc, char *argv[])
{
HASHTAB hashtab;
FILE *f;
char *pat, *ndxfile, *buf, *s;
typedef char *STRING;
STRING arr[3];
int i, cnt;
int in_files = 0, in_strings = 0;
if (argc < 3)
fail("usage: find [pat] [ndxfile]");
pat = argv[1];
printf("pat: <%s>\n", pat);
ndxfile = argv[2];
if (! (f = fopen(ndxfile, "r"))) fail("can't open ndxfile");
if (! (hashtab = MakeHashTab())) fail("can't allocate hashtab");
if (! (buf = calloc(10240, 1))) fail("insufficient memory");
while (fgets(buf, 10239, f))
{
s = buf;
if ((! *s) || (*s == '#') || (*s == '\n')) // comments or blank lines
continue;
s[strlen(s)-1] = '\0'; // remove \n
while (*s == ' ' || *s == '\t') s++; // remove leading spaces
if (! *s)
continue;
if (arr[0] = strtok(s, "\t")) cnt = 1;
if (arr[1] = strtok(NULL, "\t")) cnt = 2;
if (arr[2] = strtok(NULL, "\t")) cnt = 3;
if (cnt == 1)
{
if (strcmp(arr[0], "Files") == 0) // doh, remember match ret 0
{
in_files = 1;
}
else if (strcmp(arr[0], "Strings") == 0)
{
in_files = 0;
in_strings = 1;
fputs("Done with files\n", stderr);
}
}
if (in_files && (cnt == 2)) // TODO: error msg if cnt != 2
{
// TODO: need ability to do val -> str,
// following is hack
// could also use first line with "%u files" to alloc array
char *s = malloc(strlen(arr[1])+1);
if (! s) fail("insufficient memory");
s[0] = '\0';
strcpy(s, arr[1]);
SetHashTab(hashtab, arr[0], (unsigned) s); // not atoi
//SetHashTab(hashtab, arr[1], atoi(arr[0])); //TODO: need ability to do val->str
}
if (in_strings && (cnt == 3) && match(arr[1], pat)) //TODO: err msg if cnt!=3
{
int fcount = atoi(arr[0]);
char *str = arr[1];
char *fnums = arr[2];
int len = strlen(fnums);
int fnum;
if (*fnums == '[') fnums++;
if (fnums[len-2] == ']') fnums[len-2] = '\0' ;
if ((fcount == 1) && (fnums[len-3] == ','))
{
fnums[len-3] = '\0';
printf("%s\n\t%s\n", str, GetHashTab(hashtab, fnums));
}
else if (fcount > 1)
{
char *fnumstr;
printf("%s\n", str);
if (fcount >= 50)
printf("COMMON STRING: in %u files, including:\n", fcount);
fnumstr = strtok(fnums, ",");
do {
printf("\t%s\n", GetHashTab(hashtab, fnumstr));
} while (fnumstr = strtok(NULL, ","));
}
}
}
fclose(f);
free(buf);
}