
There are 1,002,711 words and 18,193 unique words in the Hong Kong Corpus of Spoken English.
|
RANK |
WORD |
INSTANCES |
PERCENTAGE |
|
1. |
the |
45156 |
4.7424 % |
|
2. |
a |
35834 |
3.7634 % |
|
3. |
to |
24431 |
2.5658 % |
|
4. |
and |
23753 |
2.4946 % |
|
5. |
you |
21425 |
2.2501 % |
|
6. |
I |
18923 |
1.9874 % |
|
7. |
of |
17474 |
1.8352 % |
|
8. |
in |
15047 |
1.5803 % |
|
9. |
is |
14000 |
1.4703 % |
|
10. |
that |
13118 |
1.3777 % |
|
11. |
it |
10244 |
1.0759 % |
|
12. |
we |
9850 |
1.0345 % |
|
13. |
have |
8753 |
0.9193 % |
|
14. |
yeah |
8651 |
0.9086 % |
|
15. |
so |
8436 |
0.8860 % |
|
16. |
for |
7746 |
0.8135 % |
|
17. |
this |
7065 |
0.7420 % |
|
18. |
they |
6384 |
0.6705 % |
|
19. |
but |
6204 |
0.6516 % |
|
20. |
know |
5750 |
0.6039 % |
|
21. |
are |
5733 |
0.6021 % |
|
22. |
be |
5386 |
0.5657 % |
|
23. |
it's |
5299 |
0.5565 % |
|
24. |
okay |
5032 |
0.5285 % |
|
25. |
not |
4876 |
0.5121 % |
|
26. |
one |
4667 |
0.4901 % |
|
27. |
on |
4416 |
0.4638 % |
|
28. |
do |
4368 |
0.4587 % |
|
29. |
what |
4363 |
0.4582 % |
|
30. |
can |
4260 |
0.4474 % |
|
31. |
will |
4169 |
0.4378 % |
|
32. |
think |
4067 |
0.4271 % |
|
33. |
or |
3967 |
0.4166 % |
|
34. |
with |
3841 |
0.4034 % |
|
35. |
as |
3816 |
0.4008 % |
|
36. |
if |
3742 |
0.3930 % |
|
37. |
very |
3734 |
0.3922 % |
|
38. |
Hong |
3704 |
0.3890 % |
|
39. |
Kong |
3497 |
0.3673 % |
|
40. |
because |
3458 |
0.3632 % |
|
41. |
like |
3337 |
0.3505 % |
|
42. |
at |
3266 |
0.3430 % |
|
43. |
then |
3221 |
0.3383 % |
|
44. |
right |
3205 |
0.3366 % |
|
45. |
about |
3184 |
0.3344 % |
|
46. |
no |
3013 |
0.3164 % |
|
47. |
just |
2985 |
0.3135 % |
|
48. |
there |
2956 |
0.3104 % |
|
49. |
from |
2911 |
0.3057 % |
|
50. |
don't |
2820 |
0.2962 % |
|
51. |
yes |
2812 |
0.2953 % |
|
52. |
now |
2806 |
0.2947 % |
|
53. |
well |
2707 |
0.2843 % |
|
54. |
all |
2704 |
0.2840 % |
|
55. |
your |
2656 |
0.2789 % |
|
56. |
our |
2641 |
0.2774 % |
|
57. |
oh |
2584 |
0.2714 % |
|
58. |
was |
2562 |
0.2691 % |
|
59. |
two |
2549 |
0.2677 % |
|
60. |
that's |
2508 |
0.2634 % |
|
61. |
some |
2469 |
0.2593 % |
|
62. |
would |
2300 |
0.2416 % |
|
63. |
my |
2297 |
0.2412 % |
|
64. |
when |
2233 |
0.2345 % |
|
65. |
how |
2152 |
0.2260 % |
|
66. |
more |
2152 |
0.2260 % |
|
67. |
people |
2128 |
0.2235 % |
|
68. |
he |
2100 |
0.2205 % |
|
69. |
time |
2074 |
0.2178 % |
|
70. |
also |
2002 |
0.2103 % |
|
71. |
by |
1965 |
0.2064 % |
|
72. |
say |
1911 |
0.2007 % |
|
73. |
see |
1904 |
0.2000 % |
|
74. |
here |
1770 |
0.1859 % |
|
75. |
good |
1766 |
0.1855 % |
|
76. |
go |
1752 |
0.1840 % |
|
77. |
an |
1742 |
0.1830 % |
|
78. |
me |
1721 |
0.1807 % |
|
79. |
mean |
1695 |
0.1780 % |
|
80. |
which |
1661 |
0.1744 % |
|
81. |
has |
1641 |
0.1723 % |
|
82. |
I'm |
1635 |
0.1717 % |
|
83. |
up |
1536 |
0.1613 % |
|
84. |
want |
1526 |
0.1603 % |
|
85. |
them |
1518 |
0.1594 % |
|
86. |
going |
1449 |
0.1522 % |
|
87. |
actually |
1425 |
0.1497 % |
|
88. |
get |
1421 |
0.1492 % |
|
89. |
three |
1414 |
0.1485 % |
|
90. |
these |
1378 |
0.1447 % |
|
91. |
their |
1377 |
0.1446 % |
|
92. |
really |
1371 |
0.1440 % |
|
93. |
other |
1365 |
0.1434 % |
|
94. |
got |
1357 |
0.1425 % |
|
95. |
out |
1343 |
0.1410 % |
|
96. |
much |
1261 |
0.1324 % |
|
97. |
first |
1243 |
0.1305 % |
|
98. |
any |
1183 |
0.1242 % |
|
99. |
us |
1174 |
0.1233 % |
|
100. |
should |
1160 |
0.1218 % |
|
101. |
work |
1136 |
0.1193 % |
|
102. |
need |
1135 |
0.1192 % |
|
103. |
something |
1088 |
0.1143 % |
|
104. |
use |
1081 |
0.1135 % |
|
105. |
she |
1077 |
0.1131 % |
|
106. |
only |
1073 |
0.1127 % |
|
107. |
year |
1060 |
0.1113 % |
|
108. |
why |
1056 |
0.1109 % |
|
109. |
been |
1049 |
0.1102 % |
|
110. |
look |
1018 |
0.1069 % |
|
111. |
English |
1015 |
0.1066 % |
|
112. |
may |
1003 |
0.1053 % |
|
113. |
come |
1002 |
0.1052 % |
|
114. |
you're |
993 |
0.1043 % |
|
115. |
make |
988 |
0.1038 % |
|
116. |
quite |
973 |
0.1022 % |
|
117. |
were |
954 |
0.1002 % |
|
118. |
five |
953 |
0.1001 % |
|
119. |
many |
948 |
0.0996 % |
|
120. |
lot |
928 |
0.0975 % |
|
121. |
who |
927 |
0.0974 % |
|
122. |
new |
906 |
0.0952 % |
|
123. |
give |
904 |
0.0949 % |
|
124. |
did |
897 |
0.0942 % |
|
125. |
point |
888 |
0.0933 % |
|
126. |
than |
887 |
0.0932 % |
|
127. |
back |
881 |
0.0925 % |
|
128. |
maybe |
873 |
0.0917 % |
|
129. |
Chinese |
869 |
0.0913 % |
|
130. |
hundred |
863 |
0.0906 % |
|
131. |
different |
861 |
0.0904 % |
|
132. |
take |
861 |
0.0904 % |
|
133. |
way |
851 |
0.0894 % |
|
134. |
after |
846 |
0.0888 % |
|
135. |
said |
844 |
0.0886 % |
|
136. |
those |
840 |
0.0882 % |
|
137. |
China |
827 |
0.0869 % |
|
138. |
course |
826 |
0.0867 % |
|
139. |
before |
825 |
0.0866 % |
|
140. |
Government |
820 |
0.0861 % |
|
141. |
years |
805 |
0.0845 % |
|
142. |
percent |
797 |
0.0837 % |
|
143. |
alright |
788 |
0.0828 % |
|
144. |
things |
777 |
0.0816 % |
|
145. |
last |
776 |
0.0815 % |
|
146. |
talk |
774 |
0.0813 % |
|
147. |
into |
770 |
0.0809 % |
|
148. |
still |
766 |
0.0804 % |
|
149. |
thank |
765 |
0.0803 % |
|
150. |
thing |
761 |
0.0799 % |
|
151. |
kind |
758 |
0.0796 % |
|
152. |
four |
756 |
0.0794 % |
|
153. |
business |
752 |
0.0790 % |
|
154. |
same |
748 |
0.0786 % |
|
155. |
had |
738 |
0.0775 % |
|
156. |
too |
729 |
0.0766 % |
|
157. |
even |
727 |
0.0764 % |
|
158. |
where |
721 |
0.0757 % |
|
159. |
we're |
714 |
0.0750 % |
|
160. |
most |
713 |
0.0749 % |
|
161. |
could |
711 |
0.0747 % |
|
162. |
there's |
711 |
0.0747 % |
|
163. |
long |
678 |
0.0712 % |
|
164. |
example |
669 |
0.0703 % |
|
165. |
doing |
654 |
0.0687 % |
|
166. |
over |
653 |
0.0686 % |
|
167. |
I've |
645 |
0.0677 % |
|
168. |
put |
645 |
0.0677 % |
|
169. |
system |
627 |
0.0658 % |
|
170. |
number |
623 |
0.0654 % |
|
171. |
find |
614 |
0.0645 % |
|
172. |
they're |
610 |
0.0641 % |
|
173. |
important |
608 |
0.0639 % |
|
174. |
part |
604 |
0.0634 % |
|
175. |
sure |
603 |
0.0633 % |
|
176. |
next |
602 |
0.0632 % |
|
177. |
six |
599 |
0.0629 % |
|
178. |
bit |
590 |
0.0620 % |
|
179. |
language |
586 |
0.0615 % |
|
180. |
better |
569 |
0.0598 % |
|
181. |
twenty |
565 |
0.0593 % |
|
182. |
ask |
564 |
0.0592 % |
|
183. |
sort |
562 |
0.0590 % |
|
184. |
down |
557 |
0.0585 % |
|
185. |
another |
546 |
0.0573 % |
|
186. |
I'll |
544 |
0.0571 % |
|
187. |
try |
541 |
0.0568 % |
|
188. |
thousand |
535 |
0.0562 % |
|
189. |
whether |
528 |
0.0555 % |
|
190. |
money |
527 |
0.0553 % |
|
191. |
students |
521 |
0.0547 % |
|
192. |
problem |
519 |
0.0545 % |
|
193. |
little |
518 |
0.0544 % |
|
194. |
day |
514 |
0.0540 % |
|
195. |
second |
512 |
0.0538 % |
|
196. |
company |
511 |
0.0537 % |
|
197. |
order |
509 |
0.0535 % |
|
198. |
world |
503 |
0.0528 % |
|
199. |
can't |
500 |
0.0525 % |
|
200. |
question |
498 |
0.0523 % |