nnet3bin/nnet3-xvector-compute.cc-程序员宅基地

将特征在xvector神经网络模型中前向传播，并写出输出向量。我们将说话人识别的特定神经网络结构的输出向量或embedding称之为"Xvector"。该网络结构包括：帧级别的多个前馈层、帧级别之上的聚合层、统计池化层以及段级别的附加层。通常在统计池化层之后的输出层提取xvector。默认情况下，每个语句生成一个xvector。根据需要，可以chunk中提取多个xvector并求平均，以生成单个矢量。

Usage: nnet3-xvector-compute [options] <raw-nnet-in> <features-rspecifier> <vector-wspecifier>

e.g.: nnet3-xvector-compute final.raw scp:feats.scp ark:nnet_prediction.ark

对一个语音特征chunk，生成一个xvector

static void RunNnetComputation(const MatrixBase<BaseFloat> &features,

const Nnet &nnet, CachingOptimizingCompiler *compiler,

Vector<BaseFloat> *xvector) {

ComputationRequest request;

request.need_model_derivative = false;

request.store_component_stats = false;

request.inputs.push_back(

IoSpecification("input", 0, features.NumRows()));

IoSpecification output_spec;

output_spec.name = "output";

output_spec.has_deriv = false;

将output-node所请求的输出Cindex索引数限制为1，这样，一个chunk（segment）只输出一个结果，即xvector

output_spec.indexes.resize(1);

request.outputs.resize(1);

request.outputs[0].Swap(&output_spec);

std::shared_ptr<const NnetComputation> computation(std::move(compiler->Compile(request)));

Nnet *nnet_to_update = NULL; // we're not doing any update.

NnetComputer computer(NnetComputeOptions(), *computation,

nnet, nnet_to_update);

CuMatrix<BaseFloat> input_feats_cu(features);

computer.AcceptInput("input", &input_feats_cu);

computer.Run();

CuMatrix<BaseFloat> cu_output;

//输出的cu_output为行数为1的矩阵

computer.GetOutputDestructive("output", &cu_output);

xvector->Resize(cu_output.NumCols());

//取输出矩阵的第一行向量作为xvector

xvector->CopyFromVec(cu_output.Row(0));

}

ParseOptions po(usage);

Timer timer;

NnetSimpleComputationOptions opts;

CachingOptimizingCompilerOptions compiler_config;

opts.acoustic_scale = 1.0; // by default do no scaling in this recipe.

std::string use_gpu = "no";

int32 chunk_size = -1,

min_chunk_size = 100;

//若帧组不足一个chunk，则对input进行左右padding。

bool pad_input = true;

opts.Register(&po);

compiler_config.Register(&po);

po.Register("use-gpu", &use_gpu,

"yes|no|optional|wait, only has effect if compiled with CUDA");

po.Register("chunk-size", &chunk_size,

"If set, extracts xectors from specified chunk-size, and averages. "

"If not set, extracts an xvector from all available features.");

po.Register("min-chunk-size", &min_chunk_size,

"Minimum chunk-size allowed when extracting xvectors.");

po.Register("pad-input", &pad_input, "If true, duplicate the first and "

"last frames of the input features as required to equal min-chunk-size.");

po.Read(argc, argv);

if (po.NumArgs() != 3) {

po.PrintUsage();

exit(1);

}

#if HAVE_CUDA==1

CuDevice::Instantiate().SelectGpuId(use_gpu);

#endif

std::string nnet_rxfilename = po.GetArg(1),

feature_rspecifier = po.GetArg(2),

vector_wspecifier = po.GetArg(3);

Nnet nnet;

ReadKaldiObject(nnet_rxfilename, &nnet);

SetBatchnormTestMode(true, &nnet);

SetDropoutTestMode(true, &nnet);

CollapseModel(CollapseModelConfig(), &nnet);

CachingOptimizingCompiler compiler(nnet, opts.optimize_config, compiler_config);

BaseFloatVectorWriter vector_writer(vector_wspecifier);

int32 num_success = 0, num_fail = 0;

int64 frame_count = 0;

int32 xvector_dim = nnet.OutputDim("output");

SequentialBaseFloatMatrixReader feature_reader(feature_rspecifier);

for (; !feature_reader.Done(); feature_reader.Next()) {

std::string utt = feature_reader.Key();

const Matrix<BaseFloat> &features (feature_reader.Value());

if (features.NumRows() == 0) {

KALDI_WARN << "Zero-length utterance: " << utt;

num_fail++;

continue;

}

int32 num_rows = features.NumRows(),

feat_dim = features.NumCols(),

this_chunk_size = chunk_size;

if (!pad_input && num_rows < min_chunk_size) {

KALDI_WARN << "Minimum chunk size of " << min_chunk_size

<< " is greater than the number of rows "

<< "in utterance: " << utt;

num_fail++;

continue;

} else if (num_rows < chunk_size) {

KALDI_LOG << "Chunk size of " << chunk_size << " is greater than "

<< "the number of rows in utterance: " << utt

<< ", using chunk size of " << num_rows;

this_chunk_size = num_rows;

} else if (chunk_size == -1) {

this_chunk_size = num_rows;

}

//num_chunks=1

int32 num_chunks = ceil(

num_rows / static_cast<BaseFloat>(this_chunk_size));

Vector<BaseFloat> xvector_avg(xvector_dim, kSetZero);

BaseFloat tot_weight = 0.0;

// Iterate over the feature chunks.

for (int32 chunk_indx = 0; chunk_indx < num_chunks; chunk_indx++) {

//若接近输入的末尾，需要考虑剩余的帧是否足以凑足一个chunk。

int32 offset = std::min(

this_chunk_size, num_rows - chunk_indx * this_chunk_size);

if (!pad_input && offset < min_chunk_size)

continue;

SubMatrix<BaseFloat> sub_features(

features, chunk_indx * this_chunk_size, offset, 0, feat_dim);

Vector<BaseFloat> xvector;

tot_weight += offset;

// Pad input if the offset is less than the minimum chunk size

if (pad_input && offset < min_chunk_size) {

Matrix<BaseFloat> padded_features(min_chunk_size, feat_dim);

int32 left_context = (min_chunk_size - offset) / 2;

int32 right_context = min_chunk_size - offset - left_context;

for (int32 i = 0; i < left_context; i++) {

padded_features.Row(i).CopyFromVec(sub_features.Row(0));

}

for (int32 i = 0; i < right_context; i++) {

padded_features.Row(min_chunk_size - i - 1).CopyFromVec(sub_features.Row(offset - 1));

}

padded_features.Range(left_context, offset, 0, feat_dim).CopyFromMat(sub_features);

//一个chunk生成一个xvector

RunNnetComputation(padded_features, nnet, &compiler, &xvector);

} else {

RunNnetComputation(sub_features, nnet, &compiler, &xvector);

}

//将所有chunk的xvectors进行累加

xvector_avg.AddVec(offset, xvector);

}

//求所有chunk的平均xvector

xvector_avg.Scale(1.0 / tot_weight);

vector_writer.Write(utt, xvector_avg);

frame_count += features.NumRows();

num_success++;

}

转载于:https://www.cnblogs.com/JarvanWang/p/10146015.html

本文链接：https://blog.csdn.net/weixin_30887919/article/details/99460702

原作者删帖不实内容删帖广告或垃圾文章投诉

智能推荐

2023年值得关注的几个跨境电商平台！_2023哪些跨境平台值得做-程序员宅基地

文章浏览阅读1.5k次。2022年即将过去了，2023年您打算做什么呢？不少小伙伴打算做跨境电商了，目前也正在学习当中，这里就告诉大家一下2023年值得关注的几个跨境电商平台吧！_2023哪些跨境平台值得做

深度学习 - 10.TF x Keras 基于 CNN 与 RNN 的文本序列 - 温度预测问题-程序员宅基地

文章浏览阅读1.8k次，点赞2次，收藏31次。一.引言上一篇文章基础文本处理 processing && embedding介绍了常用的文本处理方法，趁热打铁了解一下处理连续文本的 demo 流程。二.数据信息与获取下面例子将用到气象记录站的天气时间序列，数据集中每10分钟记录14个不同的指标，包含气压，温度，湿度，风向等等环境相关的特征，这里采用 2009-2016 年的数据作为备选。1.源数据获取通过本地 terminal 输入如下命令，即可在对应 Downloads 文件夹下获取到原始数据。 ..

python获取视频帧率，总帧数，python ffmpeg获取视频信息ffmpeg.prob，python opencv获取视频信息cap.get(cv2.CAP_PROP_FRAME_WIDTH)-程序员宅基地

文章浏览阅读2.3w次，点赞9次，收藏59次。文章目录1，效果2，ffmpeg获取视频信息2，opencv获取视频信息1，效果2，ffmpeg获取视频信息其中key：‘streams’对应的值是一个list，list中有两个dict类型的值，分别表示视频中视频流和音频流的相关信息。key：‘format’对应的值是一个dict，其中包含了视频的相关的格式信息、视频时长信息、文件大小信息等。import ffmpegde..._cap.get(cv2.cap_prop_frame_width)

Bitmap.createBitmap java.lang.IllegalArgumentException: width and height must be > 0 问题解决-程序员宅基地

文章浏览阅读3.6k次。java.lang.IllegalArgumentException: width and height must be > 0 在postraotate之前需要设置转换矩形区域旋转时要判断旋转角度是否大于0，否则不做旋转。缺一报错Matrix mt = new Matrix(); float delta = angle - lastAngle;_java.lang.illegalargumentexception: width and height must be > 0

linux缺页异常处理--用户空间_linux 缺页异常匿名-程序员宅基地

文章浏览阅读1.1w次，点赞2次，收藏22次。用户空间的缺页异常可以分为两种情况--1.触发异常的线性地址处于用户空间的vma中，但还未分配物理页，如果访问权限OK的话内核就给进程分配相应的物理页了2.触发异常的线性地址不处于用户空间的vma中，这种情况得判断是不是因为用户进程的栈空间消耗完而触发的缺页异常，如果是的话则在用户空间对栈区域进行扩展，并且分配相应的物理页，如果不是则作为一次非法地址访问来处理，内核将终结进程下面来看d_linux 缺页异常匿名

【MFC】多文档窗口实现现实多个不同的view窗口_mfc多文档生成多个子窗口-程序员宅基地

文章浏览阅读1.6k次。在InitInstance函数中添加多个文档模板m_pTemplateSchematicView = new CMultiDocTemplate(IDR_CTEMATYPE, RUNTIME_CLASS(CCTEMADoc), RUNTIME_CLASS(CChildFrame), // 自定义 MDI 子框架 RUNTIME_CLASS(CCTEMAView)); if (!m_pTemplateSchematicView) re..._mfc多文档生成多个子窗口

随便推点

miniui自定义弹窗（mini.showMessageBox）-程序员宅基地

文章浏览阅读3.7k次。1.miniui自定义弹窗（mini.showMessageBox）//自定义弹窗 mini.showMessageBox({ title: "请选择您要的饮品", buttons: ["白开水","咖啡","果汁","都不要"], html: "这里是tit_mini.showmessagebox

Java字符串转成输入流InputStream_java字符串转流-程序员宅基地

文章浏览阅读2k次，点赞2次，收藏6次。Java字符串转成输入流InputStream_java字符串转流

git reset 和 git revert_git revert和reset-程序员宅基地

文章浏览阅读1.4w次，点赞26次，收藏74次。一、问题描述在利用github实现多人合作程序开发的过程中，我们有时会出现错误提交的情况，此时我们希望能撤销提交操作，让程序回到提交前的样子，本文总结了两种解决方法：回退（reset）、反做（revert）。二、背景知识git的版本管理，及HEAD的理解使用git的每次提交，Git都会自动把它们串成一条时间线，这条时间线就是一个分支。如果没有新建分支，那么只有一条时间线，即只有一个分支，在Git里，这个分支叫主分支，即master分支。有一个HEAD指针指向当前分支（只有一个分支的情况下会指向ma_git revert和reset

ROS2+NAV2如何快捷的在docker中使用主机的CAN_ros2 nav2 docker arm 部署-程序员宅基地

文章浏览阅读384次。2.基于镜像创建新容器：注意为了保证和旧容器其它配置全部一样，创建时，需要保留旧的配置和旧容器创建是一样（包括挂载的目录、关键变量等），然后修改或者增加自己需要加的改动。其中old_container_id为老的容器ID，new_image为镜像名，v1为标签。如果容器已经创建，忘记指定--network=host了，又不想删除老容器，想仍然用这个容器里面的各种配置，可以考虑创建一份这个容器的镜像，基于这个镜像，指定--network=host，创建新的容器。_ros2 nav2 docker arm 部署

# Android 设置PNG图片的打印分辨率 dpi (pHYs)_phys dpi-程序员宅基地

文章浏览阅读3k次。Android 设置PNG图片的打印分辨率dpi(pHYs)1.了解png的原文件数据，头文件IHDR，控制物理密度的pHYs，关于png的头文件IHDR：https://blog.csdn.net/satanzw/article/details/38757121png图片都是以固定标识89 50 4E 47 0D 0A 1A 0A开始，然后接着IHDR例如一张png从头开始为：..._phys dpi

uniapp 解决app头部导航和手机顶部状态栏叠加问题及样式拼接写法_app顶部状态栏与头部重叠-程序员宅基地

文章浏览阅读3.8k次。app开发，手机顶部状态栏会和app头部导航叠加在一起解决方法：拿到顶部状态栏的高度，再给头部导航加个padding-top在app.vue里拿到状态栏的高度并存放在globalData里onLaunch() { const that = this; uni.getSystemInfo({ success(res) { that.globalData.statusBarHeight = res.statusBarHeight; } })},globalData:{ statu_app顶部状态栏与头部重叠