ignored in LaTeX.\n",
html_fn, ln, label_name);
}
else
print_label(fout, in_file->nr, label_name,
FALSE);
active_label = TRUE;
strcpy(label_name, name);
}
if (a_href && !no_copy && href_status == REF_OKAY)
{ if (active_href && freport != NULL)
fprintf(freport,
"%s (%d) : nested href\n", html_fn, ln);
active_href = TRUE;
atext_len = 0;
}
LATEX_OPEN_C(tagnr, a_href ? C_YES : C_OPT);
}
else if (CLOSE_TAG(T_A))
{ if (active_href)
{ atext[atext_len] = '\0';
deter_kind_of_URL(dest_file);
if (is_html_fn)
href_included = TRUE;
else if (no_copy || in_verb || in_e_alltt || in_e_header)
href_included = FALSE;
else
{ href_included = TRUE;
switch(kind_of_URL)
{ case URL_mailto:
DEBUG_P2("mailto:%s %s\n", email, atext);
if (strstr(atext, email))
href_included = !opt_dni_email;
break;
case URL_news:
DEBUG_P2("news:%s %s\n", newsgroup, atext);
if (strstr(atext, newsgroup))
href_included = !opt_dni_news;
break;
case URL_ftp:
if ( strstr(atext, ftp_site)
&& ( ftp_dir[0] == '\0'
|| strstr(atext, ftp_dir))
&& ( ftp_file[0] == '\0'
|| strstr(atext, ftp_file)))
href_included = !opt_dni_ftp;
break;
case URL_other:
if (strstr(atext, dest_file))
href_included = !opt_dni_other;
break;
}
}
if (last_ref != NULL)
last_ref->copied = href_included;
}
LATEX_CLOSES(tagnr)
}
else if (OPEN_TAG(T_VERB))
LATEX_OPEN(tagnr)
else if (CLOSE_TAG(T_VERB))
LATEX_CLOSES_K()
else if (OPEN_TAG(T_IMG))
{ if (a_src)
if (!norm_URL(html_fn, src))
{ if (freport != NULL)
fprintf(freport,
"%s (%d) : URL `%s' illegal or too long.\n",
html_fn, ln, src);
}
else
{
file_t *src_file = find_file(src) ;
if (first)
src_file->nr_refs++;
else if (!is_URL(src) && !src_file->exists)
{
if ( freport != NULL
&& ( option_warn
|| src_file->nr_refs < 2))
fprintf(freport,
"%s (%d) : file `%s' does not exist.\n",
html_fn, ln, src);
}
else if (fout && !no_copy)
{
if (tags[H_IMG].latex_open[0] != '\0')
fprintf(fout,"%s%s%s",
tags[H_IMG].latex_open, src,
tags[H_IMG].latex_close);
else
{ char *s = src + strlen(src);
while (s > src && *(s - 1) != '/')
s--;
print_f_img(fout, src, s, alt, REF_FILE_NOT_INC,
in_header > 0,
freport, html_fn, ln);
/* maybe not always fragil environment */
}
}
}
}
else if (OPEN_TAG(T_P))
{ if (stack[stack_depth-1].tagkind == T_P)
LATEX_CLOSES(H_P);
if (in_header && freport != NULL && option_info)
fprintf(freport, "%s (%d) : ignored in header\n",
html_fn, ln);
LATEX_OPEN(tagnr)
}
else if (CLOSE_TAG(T_P))
LATEX_CLOSES(tagnr)
else if (OPEN_TAG(T_CHAR))
{ int j;
bool found = FALSE;
for (j = stack_depth - 1; j >= 0; j--)
if (stack[j].tagnr == tagnr)
{ found = TRUE;
break;
}
if (found && j == stack_depth - 1)
{ REPLACE_TAG();
LATEX_CLOSES(tagnr)
}
else
{ if (found && freport != NULL)
fprintf(freport, "%s (%d) : nested <%s>\n",
html_fn, ln, html_com);
LATEX_OPEN(tagnr)
}
}
else if (CLOSE_TAG(T_CHAR))
LATEX_CLOSES(tagnr)
else
{ if (freport != NULL)
fprintf(freport, "%s (%d) : tag %s not processed\n",
html_fn, ln, tags[tagnr].name);
}
/*** indention +4 ***/
}
/*** indention +4 ***/
}
/* read > and skip till first non-space */
if (!feof(fin) && ch == '>')
{ if ((ch = (char)fgetc(fin)) == '\n') ln++;
if ( active_href && (ch == ' ' || ch == '\n' || ch == '\t')
&& atext_len < MAX_AT)
atext[atext_len++] = ' ';
while (!feof(fin) && (ch == ' ' || ch == '\n' || ch == '\t'))
{ if (fout != NULL && !no_copy)
if (ch == '\n')
{ if (in_verb || in_alltt)
{ DEBUG_GN('V');
fputc('\n', fout);
}
else if (!empty_line && !in_desc_label)
{ DEBUG_GN('T');
gen_newline = TRUE;
}
}
else
{ if (gen_newline)
{ DEBUG_GN('2');
fputc('\n', fout);
gen_newline = FALSE;
empty_line = TRUE;
}
fputc(ch, fout);
}
if ((ch = (char)fgetc(fin)) == '\n') ln++;
}
}
}
else
{ if (active_label && !in_header && after_first)
{ if (fout != NULL)
print_label(fout, in_file->nr, label_name, FALSE);
active_label = FALSE;
}
if (ch == '\n')
{ if (in_verb || in_alltt)
{ DEBUG_GN('V');
if (fout != NULL && !no_copy)
fputc('\n', fout);
}
else if (!empty_line && !in_desc_label)
{ DEBUG_GN('T');
gen_newline = TRUE;
}
if ((ch = (char)fgetc(fin)) == '\n') ln++;
}
else
{ int i = 0;
if (no_item)
{ if (fout != NULL && !no_copy)
fprintf(fout, opt_empty_item);
no_item = FALSE;
}
if ( gen_newline && fout != NULL
&& ( (!empty_line && !in_desc_label)
|| in_verb || in_alltt))
{ fputc('\n', fout);
gen_newline = FALSE;
empty_line = TRUE;
}
while (!feof(fin) && ch != '<' && ch != '\n')
{ if (ch != ' ' && ch != '\t' && !no_copy)
{ empty_line = FALSE;
empty_item = FALSE;
}
add_to_comment(ch, &i);
ch = (char)fgetc(fin);
if (active_href && atext_len < MAX_AT)
atext[atext_len++] = ch;
}
add_to_comment('\0', &i);
if (no_header && !no_copy && !empty_line && fout != NULL)
print_sp(fout, opt_no_header, html_fn);
print_str(no_copy ? NULL : fout, comment, freport, html_fn,
ln, in_math, in_verb, in_alltt);
if (ch == '\n') ln++;
}
}
}
/* In case no header occured, dump label */
close_header(fout, html_fn);
while (stack_depth > 0)
{ if ( freport != NULL
&& (!option_pedantic || stack[stack_depth-1].tagnr != T_HTML))
fprintf(freport, "%s (%d) : add %s>.\n",
html_fn, ln,
tags[stack[stack_depth-1].tagnr].name);
latex_close(fout, freport, ln, html_fn);
}
fclose(fin);
if (fout != NULL)
fprintf(fout, "\n%% html: End of file: `%s'\n", html_fn);
}
void check_exists_file(char *html_fn, FILE *freport)
{
file_t *in_file = find_file(html_fn);
if (in_file->exists)
{ FILE *fin = fopen(html_fn, "r");
if (fin == NULL)
in_file->exists = FALSE;
else
fclose(fin);
}
if (!in_file->exists)
{ if (freport != NULL)
{
if (in_file->nr_refs > 1 || option_warn)
fprintf(freport, "%s (0) : does not exist\n", html_fn);
}
}
in_file->read |= R_READ;
}
void accept_root_URL(URL, fout, freport) char *URL; FILE *fout, *freport;
{ int strlen_URL = strlen(URL);
document_URL = NALLOC(char, strlen_URL + 2);
strcpy(document_URL, URL);
if (document_URL[strlen_URL - 1] != '/')
{ document_URL[strlen_URL] = '/';
document_URL[strlen_URL + 1] = '\0';
}
server_URL = NULL;
{ int i;
for (i = 0; URL[i] != '\0' && URL[i] != ':'; i++);
if (URL[i] != '\0' && URL[i+1] == '/' && URL[i+2] == '/')
{ i += 3;
while (URL[i] != '\0' && URL[i] != '/')
i++;
if (URL[i] == '/')
file_URL = document_URL + i;
else
file_URL = "/";
URL[i] = '\0';
server_URL = SALLOC(URL);
strcpy(server_URL, URL);
}
}
if (server_URL == NULL)
{ if (fout != NULL)
fprintf(fout, "%%html: illegal URL `%s'\n", document_URL);
if (freport != NULL)
fprintf(freport, " : illegal URL `%s'\n", document_URL);
document_URL = NULL;
}
}
void scan_input_file(FILE *fin, FILE *fout, FILE *freport, bool first,
char *fn);
void scan_input_file(fin, fout, freport, first, fn)
FILE *fin, *fout, *freport;
bool first;
char *fn;
{ char ch,
buffer[7];
int ln = 1;
DEBUG_P1("scan_input_file(,,,,%s)\n", fn);
ch = (char)fgetc(fin);
while(!feof(fin))
{ int i;
for (i = 0; i < 6 && !feof(fin) && ch != '\n' && ch != ' '; i++ )
{ buffer[i] = ch;
ch = (char)fgetc(fin);
}
buffer[i] = '\0';
if (streq(buffer, "%html"))
{ char html_file[MAX_SF + 1];
int depth;
while (!feof(fin) && ch == ' ')
ch = (char)fgetc(fin);
if (ch == '-')
{ char buffer[MAX_SF + 1];
ch = (char)fgetc(fin);
if (ch == 'r')
{ ch = (char)fgetc(fin);
while (!feof(fin) && ch == ' ')
ch = (char)fgetc(fin);
i = 0;
while (!feof(fin) && ch != ' ' && ch != '\n')
{ if (i < MAX_SF)
buffer[i++] = ch;
ch = (char)fgetc(fin);
}
buffer[i] = '\0';
accept_root_URL(buffer, fout, freport);
}
else if (ch == 'm' || ch == 'l')
{ mapping_t *new = ALLOC(mapping_t);
bool is_link_mapping = ch == 'l';
ch = (char)fgetc(fin);
while (!feof(fin) && ch == ' ')
ch = (char)fgetc(fin);
i = 0;
while (!feof(fin) && ch != ' ' && ch != '\n')
{ if (i < MAX_SF)
buffer[i++] = ch;
ch = (char)fgetc(fin);
}
buffer[i] = '\0';
new->prefix = SALLOC(buffer);
strcpy(new->prefix, buffer);
while (!feof(fin) && ch == ' ')
ch = (char)fgetc(fin);
i = 0;
while (!feof(fin) && ch != ' ' && ch != '\n')
{ if (i < MAX_SF)
buffer[i++] = ch;
ch = (char)fgetc(fin);
}
buffer[i] = '\0';
new->replace = SALLOC(buffer);
strcpy(new->replace, buffer);
if (new->prefix[0] != '\0')
if (is_link_mapping)
{ new->next = final_mappings;
final_mappings = new;
}
else
{ new->next = link_mappings;
link_mappings = new;
}
}
else if (ch == 'b')
{ option_bibliography = TRUE;
if (fout != NULL)
print_bibliography(fout);
}
else if (ch == 'd')
{ int i = 0;
ch = (char)fgetc(fin);
while (ch != '\n')
{ add_to_comment(ch, &i);
ch = (char)fgetc(fin);
}
add_to_comment('\0', &i);
if (!first)
process_definition(freport, comment, fn, ln, FALSE);
}
else if (ch == 'o')
{ int i = 0;
ch = (char)fgetc(fin);
while (ch != '\n')
{ add_to_comment(ch, &i);
ch = (char)fgetc(fin);
}
add_to_comment('\0', &i);
process_option(freport, comment, fn, ln, FALSE);
}
else if (ch == 's')
{ int i = 0;
char style[21];
ch = (char)fgetc(fin);
while (ch == ' ')
ch = (char)fgetc(fin);
while (ch != '\n' && ch != ' ')
{ if (i < 20)
style[i++] = tolower(ch);
ch = (char)fgetc(fin);
}
style[i] = '\0';
if (!first)
process_style(freport, style, fn, ln);
}
else if (ch == 'i')
{ file_t *tfile;
ch = (char)fgetc(fin);
while (!feof(fin) && ch == ' ')
ch = (char)fgetc(fin);
i = 0;
while (!feof(fin) && ch != ' ' && ch != '\n')
{ if (i < MAX_SF)
html_file[i++] = ch;
ch = (char)fgetc(fin);
}
html_file[i] = '\0';
tfile = find_file(html_file);
tfile->ignore = TRUE;
}
else
{ if (fout != NULL)
fprintf(fout, "%%html: unknown option -%c\n", ch);
if (freport != NULL)
fprintf(freport, " : unknown option -%c\n", ch);
}
}
else
{
while (!feof(fin) && ch == ' ')
ch = (char)fgetc(fin);
i = 0;
while (!feof(fin) && ch != ' ' && ch != '\n')
{ if (i < MAX_SF)
html_file[i++] = ch;
ch = (char)fgetc(fin);
}
html_file[i] = '\0';
while (!feof(fin) && ch == ' ')
ch = (char)fgetc(fin);
depth = 1;
if (isdigit(ch))
depth = ch - '0';
scan_a_file(html_file, fout, freport, depth, first, TRUE);
}
while (!feof(fin) && ch != '\n')
ch = (char)fgetc(fin);
if (!feof(fin) && ch == '\n')
ch = (char)fgetc(fin);
}
else
{ if (fout != NULL)
fprintf(fout, "%s", buffer);
while (!feof(fin) && ch != '\n')
{ if (fout != NULL)
fputc(ch, fout);
ch = (char)fgetc(fin);
}
if (!feof(fin) && ch == '\n')
{ if (fout != NULL)
fputc(ch, fout);
ch = (char)fgetc(fin);
}
}
ln++;
}
}
void scan_not_included_files(freport, first, included)
FILE *freport; bool first, included;
{
if (first)
{ bool found = TRUE;
while (found)
{ file_t *file = the_files;
found = FALSE;
while (file != NULL && !found)
{ DEBUG_P4("%s %d %d %d ",
file->name, file->exists, !(file->read & R_READ),
!is_URL(file->name));
DEBUG_P3("%d %d %d\n",
is_html(file->name),
file->name[0] != '.', file->name[1] != '.');
if ( file->exists && !(file->read & R_READ)
&& !is_URL(file->name)
&& (file->name[0] != '.' || file->name[1] != '.'))
found = TRUE;
else
file = file->next;
}
if (found)
{
if (is_html(file->name))
scan_a_file(file->name, NULL, freport, 1, first, included);
else
check_exists_file(file->name, freport);
}
}
}
else
{ file_t *file = the_files;
for (file = the_files; file != NULL; file = file->next)
if ( /* !(file->read & R_INCLUDED)
&&*/ !is_URL(file->name)
&& is_html(file->name)
&& (file->name[0] != '.' || file->name[1] != '.'))
scan_a_file(file->name, NULL, freport, 1, first, FALSE);
}
}
int main(argc, argv)
int argc;
char **argv;
{ FILE *fin,
*fout,
*fref = NULL;
char *fn = NULL,
*html_fn,
*outfn = NULL,
*reffn;
bool option_chk = streq( argv[0], "chkhtml" ),
option_scan_not_inc = FALSE,
option_cross_ref = FALSE;
/* global options */
option_info = FALSE;
option_warn = FALSE;
option_pedantic = FALSE;
option_bibliography = FALSE;
printf("%s: Version %s\nWritten by %s\n\n",
option_chk ? "chkhtml" : "html2tex", VERSION, WRITTEN_BY);
calculate_all_el();
{ int i;
bool error = FALSE;
for (i = 1; i < argc; i++)
{ if (argv[i][0] == '-')
{ if (argv[i][1] == 'o')
{ if (argv[i][2] != '\0')
outfn = argv[i] + 2;
else if (i + 1 < argc)
outfn = argv[++i];
else
printf("Argument of -o option missing\n");
}
else if (argv[i][1] == 'i' && argv[i][2] == '\0')
{ option_warn = TRUE;
option_info = TRUE;
}
else if (argv[i][1] == 'w' && argv[i][2] == '\0')
option_warn = TRUE;
else if (argv[i][1] == 'p' && argv[i][2] == '\0')
option_pedantic = TRUE;
else if (argv[i][1] == 's' && argv[i][2] == '\0')
option_scan_not_inc = TRUE;
else if (argv[i][1] == 'c' && argv[i][2] == '\0')
option_chk = TRUE;
else if (argv[i][1] == 'r')
{ if (argv[i][2] != '\0')
accept_root_URL(argv[i] + 2, stdout, NULL);
else if (i + 1 < argc)
accept_root_URL(argv[++i], stdout, NULL);
else
printf("Argument of -r option missing\n");
}
else if (argv[i][1] == 'b' && argv[i][2] == '\0')
option_bibliography = TRUE;
#ifdef DYN_DEBUG
else if (argv[i][1] == 'd')
option_debug = TRUE;
#endif
else if (streq(argv[i], "-cr"))
option_cross_ref = TRUE;
else
{ printf("Unknown option %s\n", argv[i]);
error = TRUE;
}
}
else if (fn == NULL)
fn = argv[i];
else
{ printf("Too many input filenames\n");
error = TRUE;
}
}
if (fn == NULL)
{ printf("No input filename given\n");
error = TRUE;
}
else
{ fin = fopen(fn, "r");
if (fin == NULL)
{ printf("Error: Cannot open file: `%s'.\n", fn);
error = TRUE;
}
}
if (error)
{ printf("Usages: html2tex [options] \n");
printf("\nOptions:\n");
printf(" -o : specify output file\n");
printf(" -i : print info\n");
printf(" -w : print warnings (and info)\n");
printf(" -r : root URL of document\n");
printf(" -b : make bibliography\n");
printf(" -cr : generate cross-reference\n");
printf(" -c : check html file\n");
printf(" -s : scan not included files\n");
#ifdef DYN_DEBUG
printf(" -d : print (a lot of) debugging information\n");
#endif
return 1;
}
if (streq(fn + strlen(fn) - 5, ".html"))
{ is_html_fn = TRUE;
html_fn = SALLOC(fn);
strcpy(html_fn, fn);
fn[strlen(fn) - 5] = '\0';
}
}
reffn = NALLOC(char, strlen(fn) + 5);
strcpy(reffn, fn);
strcat(reffn, ".ref");
if (option_cross_ref)
{ fref = fopen(reffn, "w");
if (fref == NULL)
printf("Error: Cannot open file: %s.\n", reffn);
}
init_opts();
if (!is_html_fn)
scan_input_file(fin, (FILE *)NULL, (FILE *)NULL, TRUE, fn);
else
scan_a_file(html_fn, (FILE *)NULL, (FILE *)NULL, 0, TRUE, TRUE);
if (option_scan_not_inc)
scan_not_included_files(NULL, TRUE, is_html_fn);
{ file_t *file;
for (file = the_files; file != NULL; file = file->next)
file->read &= ~R_READ;
}
make_by_refs();
fill_re_names();
rewind(fin);
init_opts();
if (!option_chk)
{
if (outfn == NULL)
{ outfn = NALLOC(char, strlen(fn) + 5);
strcpy(outfn, fn);
strcat(outfn, ".tex");
}
fout = fopen(outfn, "w");
if (fout == NULL)
{ printf("Error: Cannot open file: %s.", outfn);
return 1;
}
if (!is_html_fn)
scan_input_file(fin, fout, stdout, FALSE, fn);
else
scan_a_file(html_fn, fout, stdout, 1, FALSE, TRUE);
}
else
scan_a_file(html_fn, (FILE *)NULL, stdout, 0, TRUE, TRUE);
if (option_scan_not_inc)
scan_not_included_files(stdout, FALSE, is_html_fn);
DEBUG_PRINT(("ready reading\n"));
/* print_files(stdout);*/
DEBUG_PRINT(("\n\n\n"));
/* errors_files(stdout); */
if (fref)
{ errors_files(fref);
fprintf(fref, "\n----------------\n\n");
print_files(fref);
}
return 0;
}