用python分析英语元音及辅音音素在单词中的出现频率数据
import sqlite3
import matplotlib.pyplot as plt
vowels=["iː","i","ɪ","e","æ","ɑː","ɒ","ɔː","ʊ","u","uː","ʌ","ɜː","ə","eɪ","əʊ","aɪ","aʊ","ɔɪ","ɪə","eə","ʊə"]
consonants=["p","b","t","d","k","ɡ","tʃ","dʒ","f","v","θ","ð","s","z","ʃ","ʒ","h","m","n","ŋ","l","r","j","w"]
conn=sqlite3.connect(r"d:\englishwords.db")
cursor=conn.cursor()
sql="SELECT * FROM englishwords"
cursor.execute(sql)
result=cursor.fetchall()
totalwords=len(result)
sql=r"SELECT * FROM englishwords WHERE pronunciation!='' AND pronunciation LIKE '%iː%'"
cursor.execute(sql)
result=cursor.fetchall()
print(len(result))
def countPhoneme(phoneme):
sql=r"SELECT pronunciation FROM englishwords WHERE pronunciation!='' AND pronunciation LIKE '%{}%'".format(phoneme)
cursor.execute(sql)
result=cursor.fetchall()
return len(result)
figure,(ax0,ax1)=plt.subplots(nrows=2,ncols=1)
figure.set_figheight(10)
figure.set_figwidth(8)
plt.subplots_adjust(left=None, bottom=None, right=None, top=None, wspace=0.5, hspace=0.3)
vowelsdata=dict()
for i in vowels:
vowelsdata[i]=0
count=countPhoneme(i)
print(i," -> ",count)
vowelsdata[i]=count/totalwords*100
vowelsdata=sorted(vowelsdata.items(),key = lambda x:x[1],reverse = True)
sortedX=[]
sortedY=[]
for i in vowelsdata:
sortedX.append(i[0])
sortedY.append(i[1])
ax0.set_ylabel("percentage %")
ax0.set_title("Vowel frequency in English words")
ax0.bar(sortedX,sortedY,color="#ff6666")
consonantsdata=dict()
for i in consonants:
consonantsdata[i]=0
count=countPhoneme(i)
print(i," -> ",count)
consonantsdata[i]=count/totalwords*100
consonantsdata=sorted(consonantsdata.items(),key = lambda x:x[1],reverse = True)
sortedX=[]
sortedY=[]
for i in consonantsdata:
sortedX.append(i[0])
sortedY.append(i[1])
ax1.set_ylabel("percentage %")
ax1.set_title("\n\nConsonant frequency in English words")
ax1.bar(sortedX,sortedY,color="#668866")
plt.show()