Automatic Speech Recognition
Transformers
Safetensors
Japanese
whisper
audio
hf-asr-leaderboard
asahi417 commited on
Commit
7c7272d
·
verified ·
1 Parent(s): 0c42027

Update pipeline/test_pipeline.py

Browse files
Files changed (1) hide show
  1. pipeline/test_pipeline.py +63 -1
pipeline/test_pipeline.py CHANGED
@@ -3,6 +3,8 @@ from datasets import load_dataset
3
  from transformers.pipelines import pipeline
4
 
5
  model_alias = "kotoba-tech/kotoba-whisper-v1.1"
 
 
6
  pipe = pipeline(model=model_alias,
7
  punctuator=True,
8
  stable_ts=True,
@@ -19,4 +21,64 @@ for i in dataset:
19
  generate_kwargs={"language": "japanese", "task": "transcribe"}
20
  )
21
  pprint(prediction)
22
- input()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  from transformers.pipelines import pipeline
4
 
5
  model_alias = "kotoba-tech/kotoba-whisper-v1.1"
6
+
7
+ print("""### P + S ###""")
8
  pipe = pipeline(model=model_alias,
9
  punctuator=True,
10
  stable_ts=True,
 
21
  generate_kwargs={"language": "japanese", "task": "transcribe"}
22
  )
23
  pprint(prediction)
24
+ input()
25
+ break
26
+
27
+ print("""### P ###""")
28
+ pipe = pipeline(model=model_alias,
29
+ punctuator=True,
30
+ stable_ts=False,
31
+ chunk_length_s=15,
32
+ batch_size=16,
33
+ trust_remote_code=True)
34
+ dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
35
+ for i in dataset:
36
+ if i["audio"]["path"] == "long_interview_1.mp3":
37
+ i["audio"]["array"] = i["audio"]["array"][:7938000]
38
+ prediction = pipe(
39
+ i["audio"],
40
+ return_timestamps=True,
41
+ generate_kwargs={"language": "japanese", "task": "transcribe"}
42
+ )
43
+ pprint(prediction)
44
+ break
45
+
46
+ print("""### S ###""")
47
+ pipe = pipeline(model=model_alias,
48
+ punctuator=False,
49
+ stable_ts=True,
50
+ chunk_length_s=15,
51
+ batch_size=16,
52
+ trust_remote_code=True)
53
+ dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
54
+ for i in dataset:
55
+ if i["audio"]["path"] == "long_interview_1.mp3":
56
+ i["audio"]["array"] = i["audio"]["array"][:7938000]
57
+ prediction = pipe(
58
+ i["audio"],
59
+ return_timestamps=True,
60
+ generate_kwargs={"language": "japanese", "task": "transcribe"}
61
+ )
62
+ pprint(prediction)
63
+ break
64
+
65
+ print("""### RAW ###""")
66
+ pipe = pipeline(model=model_alias,
67
+ punctuator=False,
68
+ stable_ts=False,
69
+ chunk_length_s=15,
70
+ batch_size=16,
71
+ trust_remote_code=True)
72
+ dataset = load_dataset("kotoba-tech/kotoba-whisper-eval", split="train")
73
+ for i in dataset:
74
+ if i["audio"]["path"] == "long_interview_1.mp3":
75
+ i["audio"]["array"] = i["audio"]["array"][:7938000]
76
+ prediction = pipe(
77
+ i["audio"],
78
+ return_timestamps=True,
79
+ generate_kwargs={"language": "japanese", "task": "transcribe"}
80
+ )
81
+ pprint(prediction)
82
+ input()
83
+ break
84
+