[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
lynx-dev an optimization for large pages (patch)
From: |
Leonid Pauzner |
Subject: |
lynx-dev an optimization for large pages (patch) |
Date: |
Sun, 13 Oct 2002 19:02:17 +0400 (MSD) |
Hi, everybody!
I now returns to lynx again. It renders some 800Kb documentation file too
slow. Profiling with this file shows a hot point in HTAnchor_findChild (11%)
and a patch attached below will decrease this number down to 0.1%
Now details:
-----------------------------------------------
1.20 0.74 2687/6107 HTAnchor_findAddress [14]
1.52 0.94 3420/6107 HTAnchor_findChildAndLink [7]
[9] 10.9 2.72 1.67 6107 HTAnchor_findChild [9]
1.66 0.00 4989243/4998014 HTIdentical [16]
0.01 0.00 3427/69137 HTSACopy [53]
0.00 0.00 3427/3427 HTChildAnchor_new [209]
0.00 0.00 3427/7204 HTList_addObject [201]
0.00 0.00 7/528 HTList_new [225]
-----------------------------------------------
It turns out the page has 3168 anchors, and HTAnchor_findChild() usage
shows a quadratic complexity on the number of anchors on the page.
Anchores are stored in a list which is traversed each time we call a
function. This is not a full story: anchors happens to be of two types:
"with a tag" and "without a tag", both are stored in the same list, but
the first force traversing list (=we add only unique tags to the list),
while the second added to the list blindly. Don't ask me why.
So I replace the list with a tree (anchors with a tag) and a list
(the rest, without tags). Search in a balanced tree is very effective.
I added a search method to HTBTree implementation, it wasn't there.
The resulting profile:
-----------------------------------------------
0.00 0.01 2687/6107 HTAnchor_findAddress [36]
0.00 0.01 3420/6107 HTAnchor_findChildAndLink [33]
[87] 0.1 0.00 0.01 6107 HTAnchor_findChild [87]
0.00 0.01 3427/3427 HTChildAnchor_new [95]
0.00 0.01 3168/6945 HTList_addObject [90]
0.00 0.00 259/65969 HTSACopy [43]
0.00 0.00 259/259 HTBTree_add [119]
0.00 0.00 7/7 HTBTree_new [212]
0.00 0.00 1/522 HTList_new [112]
0.00 0.00 2932/2932 HTBTree_search [326]
-----------------------------------------------
Now a patch against the latest dev.9 -
* optimization for parsing large html - with thousands of anchors -
remove quadratic complexity from HTAnchor_findChild() usage.
HTParentAnchor::children list was traversed zillion of times,
now replaced with a tree (anchors of one type, fast search)
and a list (just a storage for the rest, no search required).
This one gives 11% speedup for 3100 anchor html on my system. - LP
* add a search method to HTBTree implementation. - LP
diff -u -p -r LYNX2-8-.590/www/library/implemen/htanchor.h
LYNX2-8-/www/library/implemen/htanchor.h
--- LYNX2-8-.590/www/library/implemen/htanchor.h Thu Feb 8 18:50:00 2001
+++ LYNX2-8-/www/library/implemen/htanchor.h Sun Oct 13 17:25:32 2002
@@ -15,6 +15,7 @@
/* Version 1 of 24-Oct-1991 (JFG), written in C, browser-independent */
#include <HTList.h>
+#include <HTBTree.h>
#include <HTChunk.h>
#include <HTAtom.h>
#include <UCDefs.h>
@@ -52,7 +53,8 @@ struct _HTParentAnchor {
HTParentAnchor * parent; /* Parent of this anchor (self) */
/* ParentAnchor-specific information */
- HTList * children; /* Subanchors of this, if any */
+ HTList * children_notag; /* Subanchors of this, if any. (tag is NULL) */
+ HTBTree * children; /* Subanchors ... sorted by nonNULL tag */
HTList * sources; /* List of anchors pointing to this, if any */
HyperDoc * document; /* The document within which this is an anchor
*/
char * address; /* Absolute address of this node */
@@ -194,15 +196,6 @@ extern BOOL HTAnchor_delete PARAMS((
extern void HTAnchor_clearSourceCache PARAMS((
HTParentAnchor * me));
#endif
-
-/* Move an anchor to the head of the list of its siblings
-** ------------------------------------------------------
-**
-** This is to ensure that an anchor which might have already existed
-** is put in the correct order as we load the document.
-*/
-extern void HTAnchor_makeLastChild PARAMS((
- HTChildAnchor * me));
/* Data access functions
** ---------------------
diff -u -p -r LYNX2-8-.590/www/library/implemen/htanchor.c
LYNX2-8-/www/library/implemen/htanchor.c
--- LYNX2-8-.590/www/library/implemen/htanchor.c Sun Oct 6 17:43:28 2002
+++ LYNX2-8-/www/library/implemen/htanchor.c Sun Oct 13 17:25:30 2002
@@ -154,12 +154,6 @@ PRIVATE BOOL HTIdentical ARGS2(
CONST char *, t)
{
if (s && t) { /* Make sure they point to something */
-#ifdef SH_EX /* 1998/04/28 (Tue) 22:02:58 */
- if (*s == 'P' || *t == 'P') {
- if (strcmp(s + 1, "Name") == 0 || strcmp(t + 1, "Name") == 0)
- return NO;
- }
-#endif
for (; *s && *t; s++, t++) {
if (*s != *t) {
return(NO);
@@ -172,6 +166,35 @@ PRIVATE BOOL HTIdentical ARGS2(
}
#endif /* CASE_INSENSITIVE_ANCHORS */
+/*
+ * Three-way compare function
+ */
+PRIVATE int compare ARGS2(
+ HTChildAnchor *, l,
+ HTChildAnchor *, r)
+{
+ CONST char* a = l->tag;
+ CONST char* b = r->tag;
+
+ if (a && b) { /* Make sure they point to something */
+#ifdef CASE_INSENSITIVE_ANCHORS
+ return strcasecomp(a, b); /* Case insensitive */
+#else
+ return strcmp(a, b); /* Case sensitive - FM */
+#endif /* CASE_INSENSITIVE_ANCHORS */
+ } else {
+ /* bring up an order when one or both strings are nulls */
+ if (a) {
+ return 1;
+ } else if (b) {
+ return -1;
+ } else {
+ /* both nulls - assume equivalent */
+ return 0;
+ }
+ }
+}
+
/* Create new or find old sub-anchor
** ---------------------------------
@@ -184,32 +207,33 @@ PUBLIC HTChildAnchor * HTAnchor_findChil
CONST char *, tag)
{
HTChildAnchor *child;
- HTList *kids;
if (!parent) {
CTRACE((tfp, "HTAnchor_findChild called with NULL parent.\n"));
return(NULL);
}
- if ((kids = parent->children) != 0) {
- /*
- ** Parent has children. Search them.
- */
- if (tag && *tag) { /* TBL */
- while (NULL != (child=(HTChildAnchor *)HTList_nextObject(kids))) {
-#ifdef CASE_INSENSITIVE_ANCHORS
- if (HTEquivalent(child->tag, tag)) /* Case insensitive */
-#else
- if (HTIdentical(child->tag, tag)) /* Case sensitive - FM */
-#endif /* CASE_INSENSITIVE_ANCHORS */
- {
- CTRACE((tfp, "Child anchor %p of parent %p with name `%s'
already exists.\n",
- (void *)child, (void *)parent, tag));
- return(child);
- }
+
+ if (tag && *tag) { /* TBL */
+ if (parent->children) {
+ /*
+ ** Parent has children. Search them.
+ */
+ HTChildAnchor sample;
+ sample.tag = (char*)tag; /* for compare() only */
+
+ child = (HTChildAnchor *)HTBTree_search(parent->children, &sample);
+ if (child != NULL) {
+ CTRACE((tfp, "Child anchor %p of parent %p with name `%s'
already exists.\n",
+ (void *)child, (void *)parent, tag));
+ return(child);
}
- } /* end if tag is void */
- } else { /* parent doesn't have any children yet : create family */
- parent->children = HTList_new();
+ } else { /* parent doesn't have any children yet : create family */
+ parent->children = HTBTree_new((HTComparer)compare);
+ }
+ } else { /* if tag is void */
+ if (!parent->children_notag)
+ /* create a family of this kind */
+ parent->children_notag = HTList_new();
}
child = HTChildAnchor_new();
@@ -217,9 +241,16 @@ PUBLIC HTChildAnchor * HTAnchor_findChil
(void *)child,
NonNull(tag),
(void *)parent)); /* int for apollo */
- HTList_addObject (parent->children, child);
child->parent = parent;
- StrAllocCopy(child->tag, tag);
+
+ if (tag && *tag) {
+ StrAllocCopy(child->tag, tag); /* should be set before HTBTree_add */
+ HTBTree_add(parent->children, child);
+ } else {
+ child->tag = 0;
+ HTList_addObject(parent->children_notag, child);
+ }
+
return(child);
}
@@ -628,6 +659,7 @@ PUBLIC BOOL HTAnchor_delete ARGS1(
* 05-27-94 Lynx 2-3-1 Garrett Arch Blythe
*/
HTList *cur;
+ HTBTElement *ele;
HTChildAnchor *child;
/*
@@ -668,12 +700,18 @@ PUBLIC BOOL HTAnchor_delete ARGS1(
* Delete all outgoing links from children, do not
* delete the children, though.
*/
- if (!HTList_isEmpty(me->children)) {
- cur = me->children;
+ if (!HTList_isEmpty(me->children_notag)) {
+ cur = me->children_notag;
while ((child = (HTChildAnchor *)HTList_nextObject(cur)) != 0) {
- if (child != NULL) {
- deleteLinks((HTAnchor *)child);
- }
+ deleteLinks((HTAnchor *)child);
+ }
+ }
+ if (me->children) {
+ ele = me->children->top;
+ while (ele != NULL) {
+ child = (HTChildAnchor *)HTBTree_object(ele);
+ deleteLinks((HTAnchor *)child);
+ ele = HTBTree_next(me->children, ele);
}
}
me->underway = FALSE;
@@ -688,26 +726,33 @@ PUBLIC BOOL HTAnchor_delete ARGS1(
* No more incoming links : kill everything
* First, recursively delete children and their links.
*/
- if (!HTList_isEmpty(me->children)) {
+ if (me->children) {
+ ele = me->children->top;
+ while (ele != NULL) {
+ child = (HTChildAnchor *)HTBTree_object(ele);
+ deleteLinks((HTAnchor *)child);
+ FREE(child->tag);
+ ele = HTBTree_next(me->children, ele);
+ }
+ HTBTreeAndObject_free(me->children);
+ me->children = NULL;
+ }
+ /* ...and this kind of children */
+ if (!HTList_isEmpty(me->children_notag)) {
while ((child = (HTChildAnchor *)HTList_removeLastObject(
- me->children)) != 0) {
- if (child) {
- deleteLinks((HTAnchor *)child);
- if (child->tag) {
- FREE(child->tag);
- }
- FREE(child);
- }
+ me->children_notag)) !=
0) {
+ deleteLinks((HTAnchor *)child);
+ FREE(child);
}
}
me->underway = FALSE;
/*
- * Delete our empty list of children.
+ * Delete our empty list of children_notag.
*/
- if (me->children) {
- HTList_delete(me->children);
- me->children = NULL;
+ if (me->children_notag) {
+ HTList_delete(me->children_notag);
+ me->children_notag = NULL;
}
/*
@@ -811,22 +856,6 @@ PUBLIC BOOL HTAnchor_delete ARGS1(
}
-/* Move an anchor to the head of the list of its siblings
-** ------------------------------------------------------
-**
-** This is to ensure that an anchor which might have already existed
-** is put in the correct order as we load the document.
-*/
-PUBLIC void HTAnchor_makeLastChild ARGS1(
- HTChildAnchor *, me)
-{
- if (me->parent != (HTParentAnchor *)me) { /* Make sure it's a child */
- HTList * siblings = me->parent->children;
- HTList_removeObject (siblings, me);
- HTList_addObject (siblings, me);
- }
-}
-
/* Data access functions
** ---------------------
*/
@@ -919,7 +948,7 @@ PUBLIC BOOL HTAnchor_isISMAPScript ARGS1
PUBLIC BOOL HTAnchor_hasChildren ARGS1(
HTParentAnchor *, me)
{
- return (BOOL) ( me ? ! HTList_isEmpty(me->children) : NO);
+ return (BOOL) ( me ? (me->children || !HTList_isEmpty(me->children_notag))
: NO);
}
#if defined(USE_COLOR_STYLE)
diff -u -p -r LYNX2-8-.590/www/library/implemen/htbtree.c
LYNX2-8-/www/library/implemen/htbtree.c
--- LYNX2-8-.590/www/library/implemen/htbtree.c Thu Dec 21 18:44:12 2000
+++ LYNX2-8-/www/library/implemen/htbtree.c Sun Oct 13 17:25:38 2002
@@ -83,6 +83,30 @@ PUBLIC void HTBTreeAndObject_free ARGS1(
}
+PUBLIC void * HTBTree_search ARGS2(
+ HTBTree*, tree,
+ void*, object)
+ /**********************************************************************
+ ** Returns a pointer to equivalent object in a tree or NULL if none.
+ */
+{
+ HTBTElement * cur = tree->top;
+ int res;
+
+ while (cur != NULL)
+ {
+ res = tree->compare(object, cur->object);
+
+ if (res == 0)
+ return cur->object;
+ else if (res < 0)
+ cur = cur->left;
+ else if (res > 0)
+ cur = cur->right;
+ }
+ return NULL;
+}
+
PUBLIC void HTBTree_add ARGS2(
@@ -147,7 +171,8 @@ PUBLIC void HTBTree_add ARGS2(
forefather_of_element = NULL;
while (father_found)
{
- if (tree->compare(object,father_of_element->object)<0)
+ int res = tree->compare(object,father_of_element->object);
+ if (res < 0)
{
if (father_of_element->left != NULL)
father_of_element = father_of_element->left;
@@ -167,8 +192,8 @@ PUBLIC void HTBTree_add ARGS2(
added_element->right_depth = 0;
}
}
- if (tree->compare(object,father_of_element->object)>=0)
- {
+ else /* res >= 0 */
+ {
if (father_of_element->right != NULL)
father_of_element = father_of_element->right;
else
@@ -188,6 +213,7 @@ PUBLIC void HTBTree_add ARGS2(
}
}
}
+
/*
** Changing of all depths that need to be changed
*/
diff -u -p -r LYNX2-8-.590/www/library/implemen/htbtree.h
LYNX2-8-/www/library/implemen/htbtree.h
--- LYNX2-8-.590/www/library/implemen/htbtree.h Wed May 12 19:06:06 1999
+++ LYNX2-8-/www/library/implemen/htbtree.h Sun Oct 13 17:25:40 2002
@@ -76,6 +76,16 @@ extern void HTBTree_add PARAMS((HTBTree*
/*
+Search an object in a binary tree
+
+ returns Pointer to equivalent object in a tree or NULL if none.
+ */
+
+extern void * HTBTree_search PARAMS((HTBTree* tree, void * object));
+
+
+/*
+
Find user object for element
*/
@@ -91,7 +101,7 @@ Find next element in depth-first order
ele if NULL, start with leftmost element. if != 0 give
next object to
the right.
- returns Pointer to element ot NULL if none left.
+ returns Pointer to element or NULL if none left.
*/
extern HTBTElement * HTBTree_next PARAMS((HTBTree* tree, HTBTElement * ele));
; To UNSUBSCRIBE: Send "unsubscribe lynx-dev" to address@hidden
- lynx-dev an optimization for large pages (patch),
Leonid Pauzner <=