Html2Ascii: Pale Moon can't do this!

Post your FreeBASIC source, examples, tips and tricks here. Please don’t post code without including an explanation.
Post Reply
lassar
Posts: 306
Joined: Jan 17, 2006 1:35

Html2Ascii: Pale Moon can't do this!

Post by lassar »

Was trying to get pale moon to save html as text.

It was failing on adobe symbol font entries.

Created a simple html to text program.

The number of html entries in the data lines is just enough entries to get the my html
conversion accurate.

You might want to add more html entries & ascii codes to the data statements.

Does not handle tables.
Not tested on html list. (<ul>,<ol>,<li>,<dl>)

Code: Select all

'----------------------------------------------------------------------------------
'
'                                         Html2ascii 
'                                     By Randall Glass
'                                        Created 2018
'
'
'                               Donated to the public domain
'
'
'----------------------------------------------------------------------------------

#LANG "fblite"

TYPE DWORD AS UINTEGER
#INCLUDE "WINDOWS.BI"

Declare Function Replace$ (Byval StrEx as String, Byval StrMask as String, Byval StrRplce as String)
DECLARE SUB EatTags (Html$,NewAscii$)



DIM Entry$()
DIM ASCII$()

Stripped$ = ""

'Html$ = "<P CLASS=" + $DQ +  "western" + $DQ + " STYLE=" + $DQ + "margin-bottom: 0in" + $DQ + "><FONT SIZE=2 STYLE=" + $DQ + "font-size: 11pt" + $DQ + "> </SPAN></FONT>" + "D.   20,000"

' Command line format
' Html2Ascii HtmlFile, AsciiFile


    HtmlIn$ = "element3.html"   ' Command$(1)
    AsciiOut$ = "Elem3.txt"     ' Command$(2)

    HtmlIn$ = Command$(1)
    AsciiOut$ = Command$(2)


    OPEN HtmlIn$ FOR BINARY AS #1
    OPEN AsciiOut$ FOR BINARY AS #2
    
    Html$ = String(LOF(1), 0)
    Get #1,, Html$
    CLOSE #1

    Html$ = Replace$(Html$,CHR$(13,10)," ")
    Html$ = Replace$(Html$,"<P ",CHR$(13,10) + "<P ")
    Html$ = Replace$(Html$,"<p ",CHR$(13,10) + "<p ")
    Html$ = Replace$(Html$,"<P>",CHR$(13,10))
    Html$ = Replace$(Html$,"<p>",CHR$(13,10))
    Html$ = Replace$(Html$,"<BR>",CHR$(13,10))
    Html$ = Replace$(Html$,"<br>",CHR$(13,10))

#IF 1
    Html$ = Replace$(Html$,"<LI>" ,SPACE$(9) + CHR$(7) + " ")
    Html$ = Replace$(Html$,"<li>"  ,SPACE$(9) + CHR$(7) + " ")
    Html$ = Replace$(Html$,"<DT>",SPACE$(9) + CHR$(7) + " ")
    Html$ = Replace$(Html$,"<dt>" ,SPACE$(9) + CHR$(7) + " ")
#ENDIF


    DO
        Entries% = Entries% + 1
        REDIM PRESERVE Entry$(Entries%)
        REDIM PRESERVE ASCII$(Entries%)

        READ Entry$(Entries%),ASCII$(Entries%)
        'MessageBox 0, Entry$(Entries%)+ "  " + ASCII$(Entries%), "Reading Data",0
    LOOP UNTIL Entry$(Entries%) = ""

    FOR I% = 1 TO Entries% -1
        IF ASCII$(I%) = "" THEN            
            Html$ = Replace$( Html$,Entry$(I%),"")
        ELSE
            Char% = VAL(ASCII$(I%))
            Html$ = Replace$(Html$,Entry$(I%),CHR$(Char%))
        END IF
    NEXT I%

    EatTags Html$,Stripped$
    
    Put #2,, Stripped$
    CLOSE
    END

' For symbol entries see
' https://stackoverflow.com/questions/8240030/how-to-convert-symbol-font-to-standard-
' utf8-html-entity
' http://www.fileformat.info/info/unicode/font/symbol/nonunicode.htm
' https://www.stat.auckland.ac.nz/~paul/R/CM/AdobeSym.html

DATA "&#61549;", "230"
DATA "&#61552;", "227"
DATA "&#61616;", "248"
DATA "&#61617;", "241"
DATA "&#61472;", ""
DATA "&ndash;", "45"
DATA "&rsquo;", "39"
DATA "&ldquo;", "34"
DATA "&rdquo;", "34"
DATA "&", "38"
DATA "&plusmn; ", "241"
DATA "&mu;", "77"
DATA """, "34"
DATA "&micro;", "230"
DATA "&radic;", "251"
DATA "&frac14;", "172"
DATA "<","60"
DATA ">","62"
DATA "&nbsp;",""
DATA "&mdash;","45"
DATA "&shy;",""
DATA "",""


SUB EatTags (Html$,NewAscii$)

InTag% = 0
Ln& = LEN(Html$)
StringChar$ = ""

CharPos& = INSTR(Html$,"<BODY") -1

DO
    CharPos& = CharPos& + 1
    StringChar$ = MID$(Html$,CharPos&,1)

    IF StringChar$ = "<" THEN
        InTag% = 1
        CONTINUE DO
    END IF

    IF StringChar$ = ">" THEN
        InTag% = 0
        CONTINUE DO
    END IF

    IF InTag% = 0 THEN
        NewAscii$ = NewAscii$ + StringChar$
    END IF
LOOP UNTIL CharPos& >= Ln&
EXIT SUB
END SUB


'-------------------------------------------------------------------------------------------------------------------
'
'   Replace$ Orginally named StrReplace by Eternal_Pain
'
'   Url page is at:
'   https://www.freebasic-portal.de/code-beispiele/string-funktionen/strreplace-59.html
'
'
'------------------------------------------------------------------------------------------------------------------
Function Replace$ (Byval StrEx as String, _
                     Byval StrMask as String, _
                     Byval StrRplce as String)

    If Len(StrEx)=0 or Len(StrMask)>Len(StrEx) Then Return StrEx

    Dim Buffer as String=StrEx
    Dim MaskSearch as UInteger
    Dim MFound as byte
    Dim lp as UInteger=1

    Do
        MaskSearch=InStr(lp,Buffer,StrMask)
        MFound=0

        If MaskSearch Then
            MFound=1:lp=MaskSearch+Len(StrRplce)

            ''
            Buffer=Left(Buffer,MaskSearch-1)+ _
            StrRplce+ _
            Right(Buffer,Len(Buffer)-(MaskSearch+(Len(StrMask)-1)))
            ''

        End If

    Loop while MFound=1

    Return Buffer
End Function
'-----------------------------------------------------------------------------'

Post Reply