There are 1,002,711 words and 18,193 unique words in the Hong Kong Corpus of Spoken English.
RANK |
WORD |
INSTANCES |
PERCENTAGE |
1. |
the |
45156 |
4.7424 % |
2. |
a |
35834 |
3.7634 % |
3. |
to |
24431 |
2.5658 % |
4. |
and |
23753 |
2.4946 % |
5. |
you |
21425 |
2.2501 % |
6. |
I |
18923 |
1.9874 % |
7. |
of |
17474 |
1.8352 % |
8. |
in |
15047 |
1.5803 % |
9. |
is |
14000 |
1.4703 % |
10. |
that |
13118 |
1.3777 % |
11. |
it |
10244 |
1.0759 % |
12. |
we |
9850 |
1.0345 % |
13. |
have |
8753 |
0.9193 % |
14. |
yeah |
8651 |
0.9086 % |
15. |
so |
8436 |
0.8860 % |
16. |
for |
7746 |
0.8135 % |
17. |
this |
7065 |
0.7420 % |
18. |
they |
6384 |
0.6705 % |
19. |
but |
6204 |
0.6516 % |
20. |
know |
5750 |
0.6039 % |
21. |
are |
5733 |
0.6021 % |
22. |
be |
5386 |
0.5657 % |
23. |
it's |
5299 |
0.5565 % |
24. |
okay |
5032 |
0.5285 % |
25. |
not |
4876 |
0.5121 % |
26. |
one |
4667 |
0.4901 % |
27. |
on |
4416 |
0.4638 % |
28. |
do |
4368 |
0.4587 % |
29. |
what |
4363 |
0.4582 % |
30. |
can |
4260 |
0.4474 % |
31. |
will |
4169 |
0.4378 % |
32. |
think |
4067 |
0.4271 % |
33. |
or |
3967 |
0.4166 % |
34. |
with |
3841 |
0.4034 % |
35. |
as |
3816 |
0.4008 % |
36. |
if |
3742 |
0.3930 % |
37. |
very |
3734 |
0.3922 % |
38. |
Hong |
3704 |
0.3890 % |
39. |
Kong |
3497 |
0.3673 % |
40. |
because |
3458 |
0.3632 % |
41. |
like |
3337 |
0.3505 % |
42. |
at |
3266 |
0.3430 % |
43. |
then |
3221 |
0.3383 % |
44. |
right |
3205 |
0.3366 % |
45. |
about |
3184 |
0.3344 % |
46. |
no |
3013 |
0.3164 % |
47. |
just |
2985 |
0.3135 % |
48. |
there |
2956 |
0.3104 % |
49. |
from |
2911 |
0.3057 % |
50. |
don't |
2820 |
0.2962 % |
51. |
yes |
2812 |
0.2953 % |
52. |
now |
2806 |
0.2947 % |
53. |
well |
2707 |
0.2843 % |
54. |
all |
2704 |
0.2840 % |
55. |
your |
2656 |
0.2789 % |
56. |
our |
2641 |
0.2774 % |
57. |
oh |
2584 |
0.2714 % |
58. |
was |
2562 |
0.2691 % |
59. |
two |
2549 |
0.2677 % |
60. |
that's |
2508 |
0.2634 % |
61. |
some |
2469 |
0.2593 % |
62. |
would |
2300 |
0.2416 % |
63. |
my |
2297 |
0.2412 % |
64. |
when |
2233 |
0.2345 % |
65. |
how |
2152 |
0.2260 % |
66. |
more |
2152 |
0.2260 % |
67. |
people |
2128 |
0.2235 % |
68. |
he |
2100 |
0.2205 % |
69. |
time |
2074 |
0.2178 % |
70. |
also |
2002 |
0.2103 % |
71. |
by |
1965 |
0.2064 % |
72. |
say |
1911 |
0.2007 % |
73. |
see |
1904 |
0.2000 % |
74. |
here |
1770 |
0.1859 % |
75. |
good |
1766 |
0.1855 % |
76. |
go |
1752 |
0.1840 % |
77. |
an |
1742 |
0.1830 % |
78. |
me |
1721 |
0.1807 % |
79. |
mean |
1695 |
0.1780 % |
80. |
which |
1661 |
0.1744 % |
81. |
has |
1641 |
0.1723 % |
82. |
I'm |
1635 |
0.1717 % |
83. |
up |
1536 |
0.1613 % |
84. |
want |
1526 |
0.1603 % |
85. |
them |
1518 |
0.1594 % |
86. |
going |
1449 |
0.1522 % |
87. |
actually |
1425 |
0.1497 % |
88. |
get |
1421 |
0.1492 % |
89. |
three |
1414 |
0.1485 % |
90. |
these |
1378 |
0.1447 % |
91. |
their |
1377 |
0.1446 % |
92. |
really |
1371 |
0.1440 % |
93. |
other |
1365 |
0.1434 % |
94. |
got |
1357 |
0.1425 % |
95. |
out |
1343 |
0.1410 % |
96. |
much |
1261 |
0.1324 % |
97. |
first |
1243 |
0.1305 % |
98. |
any |
1183 |
0.1242 % |
99. |
us |
1174 |
0.1233 % |
100. |
should |
1160 |
0.1218 % |
101. |
work |
1136 |
0.1193 % |
102. |
need |
1135 |
0.1192 % |
103. |
something |
1088 |
0.1143 % |
104. |
use |
1081 |
0.1135 % |
105. |
she |
1077 |
0.1131 % |
106. |
only |
1073 |
0.1127 % |
107. |
year |
1060 |
0.1113 % |
108. |
why |
1056 |
0.1109 % |
109. |
been |
1049 |
0.1102 % |
110. |
look |
1018 |
0.1069 % |
111. |
English |
1015 |
0.1066 % |
112. |
may |
1003 |
0.1053 % |
113. |
come |
1002 |
0.1052 % |
114. |
you're |
993 |
0.1043 % |
115. |
make |
988 |
0.1038 % |
116. |
quite |
973 |
0.1022 % |
117. |
were |
954 |
0.1002 % |
118. |
five |
953 |
0.1001 % |
119. |
many |
948 |
0.0996 % |
120. |
lot |
928 |
0.0975 % |
121. |
who |
927 |
0.0974 % |
122. |
new |
906 |
0.0952 % |
123. |
give |
904 |
0.0949 % |
124. |
did |
897 |
0.0942 % |
125. |
point |
888 |
0.0933 % |
126. |
than |
887 |
0.0932 % |
127. |
back |
881 |
0.0925 % |
128. |
maybe |
873 |
0.0917 % |
129. |
Chinese |
869 |
0.0913 % |
130. |
hundred |
863 |
0.0906 % |
131. |
different |
861 |
0.0904 % |
132. |
take |
861 |
0.0904 % |
133. |
way |
851 |
0.0894 % |
134. |
after |
846 |
0.0888 % |
135. |
said |
844 |
0.0886 % |
136. |
those |
840 |
0.0882 % |
137. |
China |
827 |
0.0869 % |
138. |
course |
826 |
0.0867 % |
139. |
before |
825 |
0.0866 % |
140. |
Government |
820 |
0.0861 % |
141. |
years |
805 |
0.0845 % |
142. |
percent |
797 |
0.0837 % |
143. |
alright |
788 |
0.0828 % |
144. |
things |
777 |
0.0816 % |
145. |
last |
776 |
0.0815 % |
146. |
talk |
774 |
0.0813 % |
147. |
into |
770 |
0.0809 % |
148. |
still |
766 |
0.0804 % |
149. |
thank |
765 |
0.0803 % |
150. |
thing |
761 |
0.0799 % |
151. |
kind |
758 |
0.0796 % |
152. |
four |
756 |
0.0794 % |
153. |
business |
752 |
0.0790 % |
154. |
same |
748 |
0.0786 % |
155. |
had |
738 |
0.0775 % |
156. |
too |
729 |
0.0766 % |
157. |
even |
727 |
0.0764 % |
158. |
where |
721 |
0.0757 % |
159. |
we're |
714 |
0.0750 % |
160. |
most |
713 |
0.0749 % |
161. |
could |
711 |
0.0747 % |
162. |
there's |
711 |
0.0747 % |
163. |
long |
678 |
0.0712 % |
164. |
example |
669 |
0.0703 % |
165. |
doing |
654 |
0.0687 % |
166. |
over |
653 |
0.0686 % |
167. |
I've |
645 |
0.0677 % |
168. |
put |
645 |
0.0677 % |
169. |
system |
627 |
0.0658 % |
170. |
number |
623 |
0.0654 % |
171. |
find |
614 |
0.0645 % |
172. |
they're |
610 |
0.0641 % |
173. |
important |
608 |
0.0639 % |
174. |
part |
604 |
0.0634 % |
175. |
sure |
603 |
0.0633 % |
176. |
next |
602 |
0.0632 % |
177. |
six |
599 |
0.0629 % |
178. |
bit |
590 |
0.0620 % |
179. |
language |
586 |
0.0615 % |
180. |
better |
569 |
0.0598 % |
181. |
twenty |
565 |
0.0593 % |
182. |
ask |
564 |
0.0592 % |
183. |
sort |
562 |
0.0590 % |
184. |
down |
557 |
0.0585 % |
185. |
another |
546 |
0.0573 % |
186. |
I'll |
544 |
0.0571 % |
187. |
try |
541 |
0.0568 % |
188. |
thousand |
535 |
0.0562 % |
189. |
whether |
528 |
0.0555 % |
190. |
money |
527 |
0.0553 % |
191. |
students |
521 |
0.0547 % |
192. |
problem |
519 |
0.0545 % |
193. |
little |
518 |
0.0544 % |
194. |
day |
514 |
0.0540 % |
195. |
second |
512 |
0.0538 % |
196. |
company |
511 |
0.0537 % |
197. |
order |
509 |
0.0535 % |
198. |
world |
503 |
0.0528 % |
199. |
can't |
500 |
0.0525 % |
200. |
question |
498 |
0.0523 % |