RobPruzan commited on
Commit
9a395b0
·
1 Parent(s): adbc103

removing stopwords from vocabulary scores

Browse files
Files changed (1) hide show
  1. app.py +8 -0
app.py CHANGED
@@ -428,6 +428,7 @@ def get_level(word):
428
  word = word.strip(" ")
429
  data = json.loads(f.read())
430
  level = 0
 
431
  for k, v in data.items():
432
  if word in v:
433
  level = k
@@ -437,10 +438,17 @@ def get_level(word):
437
 
438
  def vocab_level_inter(text):
439
  text = word_tokenize(text)
 
 
 
440
  interp = [('',0)]
441
  sum = 0
442
  total = 0
443
  for idx, i in enumerate(text):
 
 
 
 
444
  lvl = int(get_level(i))/4
445
  interp.append((i, lvl))
446
  sum+= lvl
 
428
  word = word.strip(" ")
429
  data = json.loads(f.read())
430
  level = 0
431
+
432
  for k, v in data.items():
433
  if word in v:
434
  level = k
 
438
 
439
  def vocab_level_inter(text):
440
  text = word_tokenize(text)
441
+ stop_words = set(stopwords.words('english'))
442
+ for i in string.punctuation:
443
+ stop_words.add(i)
444
  interp = [('',0)]
445
  sum = 0
446
  total = 0
447
  for idx, i in enumerate(text):
448
+ if i in stop_words:
449
+ lvl = -1
450
+ interp.append((i, lvl))
451
+ continue
452
  lvl = int(get_level(i))/4
453
  interp.append((i, lvl))
454
  sum+= lvl