/*
check < ~/WWWcache/Logs/webcp.2749.lst > REGET

s c--- =200 000 1   17949   17949 268 000001 000000 http://www.ras.ru/cgi-bin/html-KOI/ALPINISM/ [text/html]
s c--- =200 001 1    4371    4371 062 000002 000001 http://www.ras.ru/cgi-bin/html-KOI/ALPINISM/What-s-new [text/html]
s c--- =200 001 1     306     306 002 000003 000001 http://www.ras.ru/cgi-bin/html-KOI/ALPINISM/What-s-updated [text/html]
	code n1 n2    n3      n4  n5    num1   num2     name
*/
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include <stdarg.h>
#include <sys/types.h>

char HTTP[] = "http://";

typedef struct url {
	char *name;
	int code;
	size_t size;
	int number, parent;
	char c;
	int count;
} URL;

URL urls[100000];
int nurls = 0;

char *strend(char *s){
	while(*s) s++;
	return s;
}
/* ptr to the last but \0 character of the word */
char *strlast(char *s){
	char *p = s;

	while(*p) p++;
	return (p == s || p == s+1) ? s : (p-1);
}

char *strspl(char *s1, ...){
	va_list args;
	char *snew, *next;
	int   argcount = 0, length, i;
	char *argvector[128];

	length = strlen(s1) + 1;

	va_start(args, s1);
	for(;;){
		next = argvector[argcount] = va_arg(args, char *);

		if(next == NULL)
			break;

		length += strlen(next);
		argcount++;
	}
	va_end(args);

	snew = (char *) malloc(length);;
	strcpy(snew, s1);
	for(i=0; i < argcount; i++)
		strcat(snew, argvector[i]);

	return snew;

}
main(){
	char buffer[1024 * 30];

	char c1, c2, c3, c4, c5, chr;
	int code;
	int n1, n2, n3, n4, n5;
	long num1, num2;
	char name[1024 * 20];

	int i, j;
	char *p, *q;

	while(gets(buffer) != NULL){
		sscanf(buffer, "%c%c%c%c%c%c =%03d %03d %0d %7u %7u %03d %06lu %06lu %s",
			  &c1,
			  &chr,
			  &c2, &c3, &c4, &c5,
					     &code,
						   &n1, &n2, &n3, &n4, &n5,
									 &num1, &num2, name);

		urls[nurls].name = strdup(name);
		urls[nurls].code = code;
		urls[nurls].size = n3;
		urls[nurls].number = num1;
		urls[nurls].parent = num2;
		urls[nurls].c      = c2;
		urls[nurls].count  = (-1);
		nurls++;
	}
	printf("%d URLs\n", nurls);

	printf("\t\t\t*** URLs not received ***\n");
	for(i=0; i < nurls; i++){
		if(strncmp(urls[i].name, HTTP, strlen(HTTP)) != 0)
			continue;

		if(strchr("IS", urls[i].c))
			continue;
		if(urls[i].code == 0){
			if(*strlast(urls[i].name) == '/')
				continue;

			p = strrchr(urls[i].name, '/');
			p++;

		/* SKIP SECTION ###################################### */
			if(!strcmp(p, "internal-gopher-unknown"))
				continue;
			if(!strcmp(p, "internal-gopher-menu"))
				continue;
		/* SKIP SECTION ###################################### */

			printf("%s\n", urls[i].name);
			urls[i].count = 0;

			for(j=0; j < nurls; j++){
				if(urls[j].code == 0)
					continue;
				if(strchr("IS", urls[j].c))
					continue;

				q = strrchr(urls[j].name, '/');
				q++;

				if( !strcmp(p, q)){
					printf("\t%07d %s\n", urls[j].size, urls[j].name);
					urls[i].count++;
				}
			}

		}
	}
	for(i=0; i < nurls; i++){
		if(urls[i].count == 0)
			printf("MUST GET %s\n", urls[i].name);
	}
}
