It was failing on adobe symbol font entries.
Created a simple html to text program.
The number of html entries in the data lines is just enough entries to get the my html
conversion accurate.
You might want to add more html entries & ascii codes to the data statements.
Does not handle tables.
Not tested on html list. (<ul>,<ol>,<li>,<dl>)
Code: Select all
'----------------------------------------------------------------------------------
'
' Html2ascii
' By Randall Glass
' Created 2018
'
'
' Donated to the public domain
'
'
'----------------------------------------------------------------------------------
#LANG "fblite"
TYPE DWORD AS UINTEGER
#INCLUDE "WINDOWS.BI"
Declare Function Replace$ (Byval StrEx as String, Byval StrMask as String, Byval StrRplce as String)
DECLARE SUB EatTags (Html$,NewAscii$)
DIM Entry$()
DIM ASCII$()
Stripped$ = ""
'Html$ = "<P CLASS=" + $DQ + "western" + $DQ + " STYLE=" + $DQ + "margin-bottom: 0in" + $DQ + "><FONT SIZE=2 STYLE=" + $DQ + "font-size: 11pt" + $DQ + "> </SPAN></FONT>" + "D. 20,000"
' Command line format
' Html2Ascii HtmlFile, AsciiFile
HtmlIn$ = "element3.html" ' Command$(1)
AsciiOut$ = "Elem3.txt" ' Command$(2)
HtmlIn$ = Command$(1)
AsciiOut$ = Command$(2)
OPEN HtmlIn$ FOR BINARY AS #1
OPEN AsciiOut$ FOR BINARY AS #2
Html$ = String(LOF(1), 0)
Get #1,, Html$
CLOSE #1
Html$ = Replace$(Html$,CHR$(13,10)," ")
Html$ = Replace$(Html$,"<P ",CHR$(13,10) + "<P ")
Html$ = Replace$(Html$,"<p ",CHR$(13,10) + "<p ")
Html$ = Replace$(Html$,"<P>",CHR$(13,10))
Html$ = Replace$(Html$,"<p>",CHR$(13,10))
Html$ = Replace$(Html$,"<BR>",CHR$(13,10))
Html$ = Replace$(Html$,"<br>",CHR$(13,10))
#IF 1
Html$ = Replace$(Html$,"<LI>" ,SPACE$(9) + CHR$(7) + " ")
Html$ = Replace$(Html$,"<li>" ,SPACE$(9) + CHR$(7) + " ")
Html$ = Replace$(Html$,"<DT>",SPACE$(9) + CHR$(7) + " ")
Html$ = Replace$(Html$,"<dt>" ,SPACE$(9) + CHR$(7) + " ")
#ENDIF
DO
Entries% = Entries% + 1
REDIM PRESERVE Entry$(Entries%)
REDIM PRESERVE ASCII$(Entries%)
READ Entry$(Entries%),ASCII$(Entries%)
'MessageBox 0, Entry$(Entries%)+ " " + ASCII$(Entries%), "Reading Data",0
LOOP UNTIL Entry$(Entries%) = ""
FOR I% = 1 TO Entries% -1
IF ASCII$(I%) = "" THEN
Html$ = Replace$( Html$,Entry$(I%),"")
ELSE
Char% = VAL(ASCII$(I%))
Html$ = Replace$(Html$,Entry$(I%),CHR$(Char%))
END IF
NEXT I%
EatTags Html$,Stripped$
Put #2,, Stripped$
CLOSE
END
' For symbol entries see
' https://stackoverflow.com/questions/8240030/how-to-convert-symbol-font-to-standard-
' utf8-html-entity
' http://www.fileformat.info/info/unicode/font/symbol/nonunicode.htm
' https://www.stat.auckland.ac.nz/~paul/R/CM/AdobeSym.html
DATA "", "230"
DATA "", "227"
DATA "", "248"
DATA "", "241"
DATA "", ""
DATA "–", "45"
DATA "’", "39"
DATA "“", "34"
DATA "”", "34"
DATA "&", "38"
DATA "± ", "241"
DATA "μ", "77"
DATA """, "34"
DATA "µ", "230"
DATA "√", "251"
DATA "¼", "172"
DATA "<","60"
DATA ">","62"
DATA " ",""
DATA "—","45"
DATA "­",""
DATA "",""
SUB EatTags (Html$,NewAscii$)
InTag% = 0
Ln& = LEN(Html$)
StringChar$ = ""
CharPos& = INSTR(Html$,"<BODY") -1
DO
CharPos& = CharPos& + 1
StringChar$ = MID$(Html$,CharPos&,1)
IF StringChar$ = "<" THEN
InTag% = 1
CONTINUE DO
END IF
IF StringChar$ = ">" THEN
InTag% = 0
CONTINUE DO
END IF
IF InTag% = 0 THEN
NewAscii$ = NewAscii$ + StringChar$
END IF
LOOP UNTIL CharPos& >= Ln&
EXIT SUB
END SUB
'-------------------------------------------------------------------------------------------------------------------
'
' Replace$ Orginally named StrReplace by Eternal_Pain
'
' Url page is at:
' https://www.freebasic-portal.de/code-beispiele/string-funktionen/strreplace-59.html
'
'
'------------------------------------------------------------------------------------------------------------------
Function Replace$ (Byval StrEx as String, _
Byval StrMask as String, _
Byval StrRplce as String)
If Len(StrEx)=0 or Len(StrMask)>Len(StrEx) Then Return StrEx
Dim Buffer as String=StrEx
Dim MaskSearch as UInteger
Dim MFound as byte
Dim lp as UInteger=1
Do
MaskSearch=InStr(lp,Buffer,StrMask)
MFound=0
If MaskSearch Then
MFound=1:lp=MaskSearch+Len(StrRplce)
''
Buffer=Left(Buffer,MaskSearch-1)+ _
StrRplce+ _
Right(Buffer,Len(Buffer)-(MaskSearch+(Len(StrMask)-1)))
''
End If
Loop while MFound=1
Return Buffer
End Function
'-----------------------------------------------------------------------------'