Skip to content

Instantly share code, notes, and snippets.

@xavivars
Created October 14, 2014 21:39
Show Gist options
  • Select an option

  • Save xavivars/38ecea31809d72081a81 to your computer and use it in GitHub Desktop.

Select an option

Save xavivars/38ecea31809d72081a81 to your computer and use it in GitHub Desktop.

Revisions

  1. xavivars created this gist Oct 14, 2014.
    44 changes: 44 additions & 0 deletions performance.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,44 @@
    #!/usr/bin/env python3
    # vim: set ts=4 sw=4 sts=4 et :

    import sqlite3, re
    from datetime import datetime

    missingFreqsDBConn = None

    unknownMarkRE = re.compile(r'\*([^.,;:\t\* ]+)')

    def stripUnknownMarks(text):
    print "[ str: ", datetime.now()
    a = re.sub(unknownMarkRE, r'\1', text)
    print "] str: ", datetime.now()
    return a

    def noteUnknownTokens(text, pair):
    print "[ re: ", datetime.now()
    print pair, text
    for token in re.findall(unknownMarkRE, text):
    print "->re: ", datetime.now()
    noteUnknownToken(token, pair, 'this.db')
    print "] re: ", datetime.now()


    def noteUnknownToken(token, pair, dbPath):
    print "[ sql: ", datetime.now()
    global missingFreqsDBConn

    if not missingFreqsDBConn:
    missingFreqsDBConn = sqlite3.connect(dbPath)
    c = missingFreqsDBConn.cursor()
    c.execute('CREATE TABLE IF NOT EXISTS missingFreqs (pair TEXT, token TEXT, frequency INTEGER, UNIQUE(pair, token))')
    c.execute('INSERT OR REPLACE INTO missingFreqs VALUES (:pair, :token, COALESCE((SELECT frequency FROM missingFreqs WHERE pair=:pair AND token=:token), 0) + 1)', {'pair': pair, 'token': token})

    missingFreqsDBConn.commit()
    print "] sql: ", datetime.now()


    longText = '*sampleWord *sampleWord *sampleWord *sampleWsord'

    if __name__ == '__main__':
    noteUnknownTokens(longText, 'spa-cat')
    print stripUnknownMarks(longText)