[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: gdiff Re: CVS: cvs.openbsd.org: src
On Mon, 7 Jun 2004 05:35:48 -0400 (EDT)
Ted Unangst <tedu@zeitbombe.org> wrote:
> > is there any reason why we still depend on gdiff?
> > what is wrong with diff(1)?
>
> still lacks --ignore-matching-lines=regex functionality. and the
> different algo means every patch will change, but that's one time pain.
Perhaps a new port Makefile variable can be added, something that
denotes that the port can rely on the diff in the tree instead of
gdiff?
Or is diff -I (ignore matching lines) support imminent? Here's a patch
against diff to add basic support. The method for doing so is not
very efficient, but it mostly only affects diff when using -I arguments.
The alternative approaches I tried I couldn't get to work (such as
reserving a bit in the hash value or adding another member to the line
structure, which wouldn't be very different from the approach I ended up
choosing anyway).
With this patch, a lot of examples I threw at diff with -I patterns
match the output of gdiff, but I didn't try it on my own ports tree
yet. That would be the ultimate test, right? :)
There are other areas for improvement (such as concatenating all the -I
patterns together with `\|' spliced in between, so only one regcomp()
needs to be called), but since none I've discovered affects the
bottleneck of the program, I chose to ignore them for now.
Suggestions welcome.
Index: diff.1
===================================================================
RCS file: /cvs/src/usr.bin/diff/diff.1,v
retrieving revision 1.25
diff -u -r1.25 diff.1
--- diff.1 16 Mar 2004 00:40:34 -0000 1.25
+++ diff.1 12 Jun 2004 13:00:17 -0000
@@ -38,31 +38,36 @@
.Sh SYNOPSIS
.Nm diff
.Op Fl abdilpqtTw
+.Op Fl I Ar pattern
.Oo
-.Fl c | Fl e | Fl f |
-.Fl n | Fl u
+.Fl c | e | f |
+.Fl n | u
.Oc
.Op Fl L Ar label
.Ar file1 file2
.Nm diff
.Op Fl abdilpqtTw
+.Op Fl I Ar pattern
.Op Fl L Ar label
.Fl C Ar number
.Ar file1 file2
.Nm diff
.Op Fl abdilqtw
+.Op Fl I Ar pattern
.Fl D Ar string
.Ar file1 file2
.Nm diff
.Op Fl abdilpqtTw
+.Op Fl I Ar pattern
.Op Fl L Ar label
.Fl U Ar number
.Ar file1 file2
.Nm diff
.Op Fl abdilNPpqtTw
+.Op Fl I Ar pattern
.Oo
-.Fl c | Fl e | Fl f |
-.Fl n | Fl u
+.Fl c | e | f |
+.Fl n | u
.Oc
.Bk -words
.Op Fl L Ar label
@@ -97,7 +102,7 @@
The lines removed from
.Ar file1
are marked with
-.Sq \-\ \& ;
+.Sq \&-\ \& ;
those added to
.Ar file2
are marked
@@ -179,7 +184,8 @@
Comparison options:
.Bl -tag -width Ds
.It Fl a
-Treat all files as ASCII.
+Treat all files as
+.Tn ASCII .
.It Fl b
Causes trailing blanks (spaces and tabs) to be ignored, and other
strings of blanks to compare equal.
@@ -187,6 +193,16 @@
Try very hard to produce a diff as small as possible.
This may consume a lot of processing power and memory when processing
large files with many changes.
+.It Fl I Ar pattern
+Ignores changes whose lines match
+.Ar pattern .
+All lines in the change must match for the change to be ignored.
+Multiple
+.Fl I
+patterns may be specified.
+See
+.Xr re_format 7
+for more information on the syntax of regular expression patterns.
.It Fl i
Ignores the case of letters.
E.g.,
@@ -285,8 +301,9 @@
common subdirectories, and files which appear in only one directory
are described as such.
In directory mode only regular files and directories are compared.
-If a non-regular file such as a device special file or FIFO is
-encountered, a diagnostic message is printed.
+If a non-regular file such as a device special file or
+.Tn FIFO
+is encountered, a diagnostic message is printed.
.Pp
If only one of
.Ar file1
@@ -372,11 +389,11 @@
.Va ZZ .
.It Li XX,YY Ns Ic c Ns Li ZZ,QQ
Replace the range
-.Va XX , Ns YY
+.Va XX , Ns Va YY
from
.Ar file1
with the range
-.Va ZZ , Ns QQ
+.Va ZZ , Ns Va QQ
from
.Ar file2 .
.El
@@ -418,7 +435,7 @@
.El
.Sh FILES
.Bl -tag -width /tmp/diff.XXXXXXXX -compact
-.It Pa /tmp/diff.XXXXXXXX
+.It Pa /tmp/diff. Ns Ar XXXXXXXX
Temporary file used when comparing a device or the standard input.
Note that the temporary file is unlinked as soon as it is created
so it will not show up in a directory listing.
@@ -442,7 +459,8 @@
.Xr diff3 1 ,
.Xr ed 1 ,
.Xr pr 1 ,
-.Xr fnmatch 3
+.Xr fnmatch 3 ,
+.Xr re_format 7
.Sh STANDARDS
The
.Nm
Index: diff.c
===================================================================
RCS file: /cvs/src/usr.bin/diff/diff.c,v
retrieving revision 1.45
diff -u -r1.45 diff.c
--- diff.c 16 Mar 2004 00:40:34 -0000 1.45
+++ diff.c 12 Jun 2004 13:00:18 -0000
@@ -46,8 +46,9 @@
char *start, *ifdefname, *diffargs, *label;
struct stat stb1, stb2;
struct excludes *excludes_list;
+struct ignore_res *ignore_res_list;
-#define OPTIONS "0123456789abC:cdD:efhiL:lnNPpqrS:sTtU:uwX:x:"
+#define OPTIONS "0123456789abC:cdD:efhI:iL:lnNPpqrS:sTtU:uwX:x:"
static struct option longopts[] = {
{ "text", no_argument, 0, 'a' },
{ "ignore-space-change", no_argument, 0, 'b' },
@@ -56,6 +57,7 @@
{ "minimal", no_argument, 0, 'd' },
{ "ed", no_argument, 0, 'e' },
{ "forward-ed", no_argument, 0, 'f' },
+ { "ignore-matching-lines", required_argument, 0, 'I' },
{ "ignore-case", no_argument, 0, 'i' },
{ "paginate", no_argument, 0, 'l' },
{ "label", required_argument, 0, 'L' },
@@ -78,6 +80,7 @@
__dead void usage(void);
void push_excludes(char *);
+void push_ignore_res(char *);
void read_excludes_file(char *file);
void set_argstr(char **, char **);
@@ -139,6 +142,9 @@
case 'h':
/* silently ignore for backwards compatibility */
break;
+ case 'I':
+ push_ignore_res(optarg);
+ break;
case 'i':
iflag = 1;
break;
@@ -340,6 +346,24 @@
entry->pattern = pattern;
entry->next = excludes_list;
excludes_list = entry;
+}
+
+void
+push_ignore_res(char *pattern)
+{
+ struct ignore_res *entry;
+ regex_t re;
+ char buf[BUFSIZ];
+ int error;
+
+ if ((error = regcomp(&re, pattern, 0)) != 0) {
+ regerror(error, &re, buf, sizeof(buf));
+ err(2, "%s: %s", pattern, buf);
+ }
+ entry = emalloc(sizeof(*entry));
+ entry->re = re;
+ entry->next = ignore_res_list;
+ ignore_res_list = entry;
}
void
Index: diff.h
===================================================================
RCS file: /cvs/src/usr.bin/diff/diff.h,v
retrieving revision 1.27
diff -u -r1.27 diff.h
--- diff.h 16 Mar 2004 00:40:34 -0000 1.27
+++ diff.h 12 Jun 2004 13:00:18 -0000
@@ -31,6 +31,9 @@
* @(#)diff.h 8.1 (Berkeley) 6/6/93
*/
+#include <sys/types.h>
+#include <regex.h>
+
/*
* Output format options
*/
@@ -70,12 +73,18 @@
struct excludes *next;
};
+struct ignore_res {
+ regex_t re;
+ struct ignore_res *next;
+};
+
extern int aflag, bflag, dflag, iflag, lflag, Nflag, Pflag, pflag, rflag,
sflag, tflag, Tflag, wflag;
extern int format, context, status;
extern char *start, *ifdefname, *diffargs, *label;
extern struct stat stb1, stb2;
extern struct excludes *excludes_list;
+extern struct ignore_res *ignore_res_list;
char *splice(char *, char *);
int diffreg(char *, char *, int);
Index: diffreg.c
===================================================================
RCS file: /cvs/src/usr.bin/diff/diffreg.c,v
retrieving revision 1.55
diff -u -r1.55 diffreg.c
--- diffreg.c 7 Jan 2004 17:18:32 -0000 1.55
+++ diffreg.c 12 Jun 2004 13:00:19 -0000
@@ -163,6 +163,11 @@
int value;
} *file[2];
+struct lbuf {
+ char *line;
+ int pos, max;
+};
+
/*
* The following struct is used to record change information when
* doing a "context" or "unified" diff. (see routine "change" to
@@ -185,6 +190,7 @@
static int pref, suff; /* length of prefix and suffix */
static int slen[2];
static int anychange;
+static char *ignore[2];
static long *ixnew; /* will be overlaid on file[1] */
static long *ixold; /* will be overlaid on klist */
static struct cand *clist; /* merely a free storage pot for candidates */
@@ -221,11 +227,13 @@
static int skipline(FILE *);
static int isqrt(int);
static int stone(int *, int, int *, int *);
-static int readhash(FILE *);
+static int readhash(FILE *, int *);
static int files_differ(FILE *, FILE *, int);
static __inline int min(int, int);
static __inline int max(int, int);
static char *match_function(const long *, int, FILE *);
+static void pushlbuf(struct lbuf *, int, int);
+static __inline int ignoreline(char *);
/*
@@ -547,7 +555,8 @@
prepare(int i, FILE *fd, off_t filesize)
{
struct line *p;
- int j, h;
+ char *ign = NULL;
+ int ignline, j, h;
size_t sz;
rewind(fd);
@@ -557,15 +566,22 @@
sz = 100;
p = emalloc((sz + 3) * sizeof(struct line));
- for (j = 0; (h = readhash(fd));) {
+ if (ignore_res_list)
+ ign = emalloc((sz + 3) * sizeof(*ign));
+ for (j = 0; (h = readhash(fd, &ignline));) {
if (j == sz) {
sz = sz * 3 / 2;
p = erealloc(p, (sz + 3) * sizeof(struct line));
+ if (ignore_res_list)
+ ign = erealloc(ign, (sz + 3) * sizeof(*ign));
}
p[++j].value = h;
+ if (ignore_res_list)
+ ign[j] = ignline;
}
len[i] = j;
file[i] = p;
+ ignore[i] = ign;
}
static void
@@ -981,6 +997,54 @@
restart:
if (format != D_IFDEF && a > b && c > d)
return;
+ if (ignore_res_list) {
+ int proceed;
+
+ if (b - a == d - c) {
+ /*
+ * Change. Try to follow GNU diff's
+ * odd behavior.
+ */
+ proceed = 0;
+ for (i = 0; i <= b - a; i++)
+ if (ignore[0][a + i] == 0 ||
+ ignore[1][c + i] == 0) {
+ /*
+ * Rats, a line wasn't matched.
+ * That means we can't ignore it.
+ */
+ proceed = 1;
+ break;
+ }
+ if (proceed == 0)
+ return;
+ } else if (a == b) {
+ /*
+ * Delete. Just like change; all lines must
+ * contain the pattern to ignore the delete.
+ */
+ proceed = 0;
+ for (i = d; i <= c; i++)
+ if (ignore[1][i] == 0) {
+ proceed = 1;
+ break;
+ }
+ if (proceed == 0)
+ return;
+ } else if (c == d) {
+ /*
+ * Insert. Same as delete.
+ */
+ proceed = 0;
+ for (i = b; i <= a; i++)
+ if (ignore[0][i] == 0) {
+ proceed = 1;
+ break;
+ }
+ if (proceed == 0)
+ return;
+ }
+ }
if (format == D_CONTEXT || format == D_UNIFIED) {
/*
* Allocate change records as needed.
@@ -1164,11 +1228,17 @@
* Hash function taken from Robert Sedgewick, Algorithms in C, 3d ed., p 578.
*/
static int
-readhash(FILE *f)
+readhash(FILE *f, int *ignore)
{
int i, t, space;
int sum;
+ struct lbuf lbuf;
+ if (ignore_res_list) {
+ lbuf.line = NULL;
+ lbuf.max = lbuf.pos = -1;
+ }
+
sum = 1;
space = 0;
if (!bflag && !wflag) {
@@ -1179,6 +1249,8 @@
return (0);
break;
}
+ if (ignore_res_list)
+ pushlbuf(&lbuf, 80, t);
sum = sum * 127 + chrtran[t];
}
else
@@ -1188,6 +1260,8 @@
return (0);
break;
}
+ if (ignore_res_list)
+ pushlbuf(&lbuf, 80, t);
sum = sum * 127 + t;
}
} else {
@@ -1202,6 +1276,8 @@
i++;
space = 0;
}
+ if (ignore_res_list)
+ pushlbuf(&lbuf, 80, t);
sum = sum * 127 + chrtran[t];
i++;
continue;
@@ -1215,11 +1291,37 @@
break;
}
}
+ if (ignore_res_list) {
+ pushlbuf(&lbuf, 1, '\0');
+ *ignore = ignoreline(lbuf.line);
+ free(lbuf.line);
+ }
/*
* There is a remote possibility that we end up with a zero sum.
* Zero is used as an EOF marker, so return 1 instead.
*/
return (sum == 0 ? 1 : sum);
+}
+
+static void
+pushlbuf(struct lbuf *lb, int inc, int c)
+{
+ if (++lb->pos >= lb->max) {
+ lb->max = max(lb->max, lb->pos) + inc;
+ lb->line = erealloc(lb->line, lb->max * sizeof(char));
+ }
+ lb->line[lb->pos] = c;
+}
+
+static int __inline
+ignoreline(char *line)
+{
+ struct ignore_res *rp;
+
+ for (rp = ignore_res_list; rp != NULL; rp = rp->next)
+ if (regexec(&rp->re, line, 0, NULL, 0) == 0)
+ return 1;
+ return 0;
}
static int