Solarium 如何创建新字段的配置,而不是插入数据自动创建

如,我插入了一条数据["title"=>"今天天气真好"],

$update=$this->client->createUpdate();
$doc=$update->createDocument();
$doc->title = "今天天气真好"
$update->addDocument($doc);
$update->addCommit();
$updateResponse=$this->client->update($update);

会自动在配置文件中加入:

<field name="title" type="text_general"/>

而这个自动生成的类型 text_general 不是我想要的,也不想在插数据时生成配置,有没有方法能设置字段类型,并生成一条字段的配置呢?

阅读 2.1k
1 个回答
新手上路,请多包涵
/**
 * 创建/修改core
 * 要先创建目录,复制conf目录
 * 然后复制自定义字段jar
 * @param $coreName
 * @param array $field_array
 * @param bool $new 是否是新增core
 * @return bool
 */
public function createCore($coreName, array $field_array = [], $new=true){
    if($new){
        // 生成配置文件
        mkdir($this->solr_home_path.$coreName);
        copydir($this->default_conf_path,$this->solr_home_path.$coreName."\\conf");
        mkdir($this->solr_home_path.$coreName."\\lib");
        // 生成自定义关键字分词文件(| 和 , 为关键字)
        copy($this->verticalLineFile,$this->solr_home_path.$coreName."\\lib\\MyVerticalLineTokenizerFactory.jar");
        copy($this->commaFile,$this->solr_home_path.$coreName."\\lib\\MyCommaTokenizerFactory.jar");
    }

    // 生成schema文件
    $this->createSchemaFile($field_array,$this->solr_home_path.$coreName."\\conf"."\\managed-schema");

    // 新增core或重新配置core
    $coreAdminQuery = $this->client->createCoreAdmin();
    if($new){
        $createAction = $coreAdminQuery->createCreate();
        $createAction->setCore($coreName);
        $createAction->setDataDir("data");
        $createAction->setSchema("conf/managed-schema");
    }else{
        $createAction = $coreAdminQuery->createReload();
        $createAction->setCore($coreName);
    }
    $coreAdminQuery->setAction($createAction);
    $this->client->coreAdmin($coreAdminQuery);
    return true;
}


 /**
     * 创建Schema文件
     * @param array $field_array
     * @param $path
     * @return bool
     */
    public function createSchemaFile(array $field_array, $path){
        $fields_str = "";
        foreach ($field_array as $val){
            $val["required"] =  $val["required"] ?? "false";
            $val["indexed"]  =  $val["indexed"] ?? "false";
            $val["stored"]   =  $val["stored"] ?? "false";
            $fields_str.='<field name="'.$val["name"].'" type="'.$val["type"].'" indexed="'.$val["indexed"].'" stored="'.$val["stored"].'" required="'.$val["required"].'"/>';
        }
        $file = <<<EOF
<?xml version="1.0" encoding="UTF-8" ?>
<schema name="default-config" version="1.6">
<field name="uuid" type="uuid" indexed="true" stored="true" required="true" multiValued="false" />
{$fields_str}
<field name="_version_" type="plong" indexed="false" stored="false"/>
<field name="_text_" type="text_general" indexed="true" stored="false" multiValued="true"/>

<dynamicField name="*_i"  type="pint"    indexed="true"  stored="true"/>
<dynamicField name="*_is" type="pints"    indexed="true"  stored="true"/>
<dynamicField name="*_s"  type="string"  indexed="true"  stored="true" />
<dynamicField name="*_ss" type="strings"  indexed="true"  stored="true"/>
<dynamicField name="*_l"  type="plong"   indexed="true"  stored="true"/>
<dynamicField name="*_ls" type="plongs"   indexed="true"  stored="true"/>
<dynamicField name="*_t" type="text_general" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_txt" type="text_general" indexed="true" stored="true"/>
<dynamicField name="*_b"  type="boolean" indexed="true" stored="true"/>
<dynamicField name="*_bs" type="booleans" indexed="true" stored="true"/>
<dynamicField name="*_f"  type="pfloat"  indexed="true"  stored="true"/>
<dynamicField name="*_fs" type="pfloats"  indexed="true"  stored="true"/>
<dynamicField name="*_d"  type="pdouble" indexed="true"  stored="true"/>
<dynamicField name="*_ds" type="pdoubles" indexed="true"  stored="true"/>
<dynamicField name="random_*" type="random"/>

<!-- Type used for data-driven schema, to add a string copy for each text field -->
<dynamicField name="*_str" type="strings" stored="false" docValues="true" indexed="false" useDocValuesAsStored="false"/>

<dynamicField name="*_dt"  type="pdate"    indexed="true"  stored="true"/>
<dynamicField name="*_dts" type="pdate"    indexed="true"  stored="true" multiValued="true"/>
<dynamicField name="*_p"  type="location" indexed="true" stored="true"/>
<dynamicField name="*_srpt"  type="location_rpt" indexed="true" stored="true"/>

<!-- payloaded dynamic fields -->
<dynamicField name="*_dpf" type="delimited_payloads_float" indexed="true"  stored="true"/>
<dynamicField name="*_dpi" type="delimited_payloads_int" indexed="true"  stored="true"/>
<dynamicField name="*_dps" type="delimited_payloads_string" indexed="true"  stored="true"/>

<dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>

<!-- Field to use to determine and enforce document uniqueness.
Unless this field is marked with required="false", it will be a required field
-->
<uniqueKey>uuid</uniqueKey>
<uniqueKey>id</uniqueKey>
<fieldType name="uuid" class="solr.UUIDField" indexed="true" />
<fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true" />
<fieldType name="strings" class="solr.StrField" sortMissingLast="true" multiValued="true" docValues="true" />

<!-- boolean type: "true" or "false" -->
<fieldType name="boolean" class="solr.BoolField" sortMissingLast="true"/>
<fieldType name="booleans" class="solr.BoolField" sortMissingLast="true" multiValued="true"/>

<!--
Numeric field types that index values using KD-trees.
Point fields don't support FieldCache, so they must have docValues="true" if needed for sorting, faceting, functions, etc.
-->
<fieldType name="pint" class="solr.IntPointField" docValues="true"/>
<fieldType name="pfloat" class="solr.FloatPointField" docValues="true"/>
<fieldType name="plong" class="solr.LongPointField" docValues="true"/>
<fieldType name="pdouble" class="solr.DoublePointField" docValues="true"/>

<fieldType name="pints" class="solr.IntPointField" docValues="true" multiValued="true"/>
<fieldType name="pfloats" class="solr.FloatPointField" docValues="true" multiValued="true"/>
<fieldType name="plongs" class="solr.LongPointField" docValues="true" multiValued="true"/>
<fieldType name="pdoubles" class="solr.DoublePointField" docValues="true" multiValued="true"/>
<fieldType name="random" class="solr.RandomSortField" indexed="true"/>

<fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
<fieldType name="pdates" class="solr.DatePointField" docValues="true" multiValued="true"/>

<!--Binary data type. The data should be sent/retrieved in as Base64 encoded Strings -->
<fieldType name="binary" class="solr.BinaryField"/>


<dynamicField name="*_ws" type="text_ws"  indexed="true"  stored="true"/>
<fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
</analyzer>
</fieldType>


<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/>
-->
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>


<!-- SortableTextField generaly functions exactly like TextField,
except that it supports, and by default uses, docValues for sorting (or faceting)
on the first 1024 characters of the original field values (which is configurable).

This makes it a bit more useful then TextField in many situations, but the trade-off
is that it takes up more space on disk; which is why it's not used in place of TextField
for every fieldType in this _default schema.
-->
<dynamicField name="*_t_sort" type="text_gen_sort" indexed="true" stored="true" multiValued="false"/>
<dynamicField name="*_txt_sort" type="text_gen_sort" indexed="true" stored="true"/>
<fieldType name="text_gen_sort" class="solr.SortableTextField" positionIncrementGap="100" multiValued="true">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

<!-- A text field with defaults appropriate for English: it tokenizes with StandardTokenizer,
removes English stop words (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
finally applies Porter's stemming.  The query time analyzer also applies synonyms from synonyms.txt. -->
<dynamicField name="*_txt_en" type="text_en"  indexed="true"  stored="true"/>
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.FlattenGraphFilterFactory"/>
-->
<!-- Case insensitive stop word removal.
-->
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/>
-->
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/>
-->
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>

<!-- A text field with defaults appropriate for English, plus
aggressive word-splitting and autophrase features enabled.
This field is just like text_en, except it adds
WordDelimiterGraphFilter to enable splitting and matching of
words on case-change, alpha numeric boundaries, and
non-alphanumeric chars.  This means certain compound word
cases will work, for example query "wi fi" will match
document "WiFi" or "wi-fi".
-->
<dynamicField name="*_txt_en_split" type="text_en_splitting"  indexed="true"  stored="true"/>
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<!-- in this example, we will only use synonyms at query time
<filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
-->
<!-- Case insensitive stop word removal.
-->
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory"
ignoreCase="true"
words="lang/stopwords_en.txt"
/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>

<!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
<dynamicField name="*_txt_en_split_tight" type="text_en_splitting_tight"  indexed="true"  stored="true"/>
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterGraphFilter in conjuncton with stemming. -->
<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
<filter class="solr.FlattenGraphFilterFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>

<filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
</analyzer>
</fieldType>

<dynamicField name="*_txt_rev" type="text_general_rev"  indexed="true"  stored="true"/>
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

<dynamicField name="*_phon_en" type="phonetic_en"  indexed="true"  stored="true"/>
<fieldType name="phonetic_en" stored="false" indexed="true" class="solr.TextField" >
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.DoubleMetaphoneFilterFactory" inject="false"/>
</analyzer>
</fieldType>

<dynamicField name="*_s_lower" type="lowercase"  indexed="true"  stored="true"/>
<fieldType name="lowercase" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.KeywordTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>

<dynamicField name="*_descendent_path" type="descendent_path"  indexed="true"  stored="true"/>
<fieldType name="descendent_path" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.KeywordTokenizerFactory" />
</analyzer>
</fieldType>

<dynamicField name="*_ancestor_path" type="ancestor_path"  indexed="true"  stored="true"/>
<fieldType name="ancestor_path" class="solr.TextField">
<analyzer type="index">
<tokenizer class="solr.KeywordTokenizerFactory" />
</analyzer>
<analyzer type="query">
<tokenizer class="solr.PathHierarchyTokenizerFactory" delimiter="/" />
</analyzer>
</fieldType>

<dynamicField name="*_point" type="point"  indexed="true"  stored="true"/>
<fieldType name="point" class="solr.PointType" dimension="2" subFieldSuffix="_d"/>

<!-- A specialized field for geospatial search filters and distance sorting. -->
<fieldType name="location" class="solr.LatLonPointSpatialField" docValues="true"/>

<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
geo="true" distErrPct="0.025" maxDistErr="0.001" distanceUnits="kilometers" />

<fieldType name="delimited_payloads_float" stored="false" indexed="true" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="float"/>
</analyzer>
</fieldType>
<fieldType name="delimited_payloads_int" stored="false" indexed="true" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="integer"/>
</analyzer>
</fieldType>
<fieldType name="delimited_payloads_string" stored="false" indexed="true" class="solr.TextField">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
<filter class="solr.DelimitedPayloadTokenFilterFactory" encoder="identity"/>
</analyzer>
</fieldType>

<!-- some examples for different languages (generally ordered by ISO code) -->

<!-- Arabic -->
<dynamicField name="*_txt_ar" type="text_ar"  indexed="true"  stored="true"/>
<fieldType name="text_ar" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- for any non-arabic -->
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
<!-- normalizes ﻯ to ﻱ, etc -->
<filter class="solr.ArabicNormalizationFilterFactory"/>
<filter class="solr.ArabicStemFilterFactory"/>
</analyzer>
</fieldType>

<!-- Bulgarian -->
<dynamicField name="*_txt_bg" type="text_bg"  indexed="true"  stored="true"/>
<fieldType name="text_bg" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/> 
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" /> 
<filter class="solr.BulgarianStemFilterFactory"/>       
</analyzer>
</fieldType>

<!-- Catalan -->
<dynamicField name="*_txt_ca" type="text_ca"  indexed="true"  stored="true"/>
<fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>       
</analyzer>
</fieldType>

<!-- CJK bigram (see text_ja for a Japanese configuration using morphological analysis) -->
<dynamicField name="*_txt_cjk" type="text_cjk"  indexed="true"  stored="true"/>
<fieldType name="text_cjk" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- normalize width before bigram, as e.g. half-width dakuten combine  -->
<filter class="solr.CJKWidthFilterFactory"/>
<!-- for any non-CJK -->
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.CJKBigramFilterFactory"/>
</analyzer>
</fieldType>

<!-- Czech -->
<dynamicField name="*_txt_cz" type="text_cz"  indexed="true"  stored="true"/>
<fieldType name="text_cz" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
<filter class="solr.CzechStemFilterFactory"/>       
</analyzer>
</fieldType>

<!-- Danish -->
<dynamicField name="*_txt_da" type="text_da"  indexed="true"  stored="true"/>
<fieldType name="text_da" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Danish"/>       
</analyzer>
</fieldType>

<!-- German -->
<dynamicField name="*_txt_de" type="text_de"  indexed="true"  stored="true"/>
<fieldType name="text_de" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
<filter class="solr.GermanNormalizationFilterFactory"/>
<filter class="solr.GermanLightStemFilterFactory"/>
<!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="German2"/> -->
</analyzer>
</fieldType>

<!-- Greek -->
<dynamicField name="*_txt_el" type="text_el"  indexed="true"  stored="true"/>
<fieldType name="text_el" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- greek specific lowercase for sigma -->
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
<filter class="solr.GreekStemFilterFactory"/>
</analyzer>
</fieldType>

<!-- Spanish -->
<dynamicField name="*_txt_es" type="text_es"  indexed="true"  stored="true"/>
<fieldType name="text_es" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
<filter class="solr.SpanishLightStemFilterFactory"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
</analyzer>
</fieldType>

<!-- Basque -->
<dynamicField name="*_txt_eu" type="text_eu"  indexed="true"  stored="true"/>
<fieldType name="text_eu" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
</analyzer>
</fieldType>

<!-- Persian -->
<dynamicField name="*_txt_fa" type="text_fa"  indexed="true"  stored="true"/>
<fieldType name="text_fa" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<!-- for ZWNJ -->
<charFilter class="solr.PersianCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ArabicNormalizationFilterFactory"/>
<filter class="solr.PersianNormalizationFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
</analyzer>
</fieldType>

<!-- Finnish -->
<dynamicField name="*_txt_fi" type="text_fi"  indexed="true"  stored="true"/>
<fieldType name="text_fi" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
<!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
</analyzer>
</fieldType>

<!-- French -->
<dynamicField name="*_txt_fr" type="text_fr"  indexed="true"  stored="true"/>
<fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
<filter class="solr.FrenchLightStemFilterFactory"/>
<!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
</analyzer>
</fieldType>

<!-- Irish -->
<dynamicField name="*_txt_ga" type="text_ga"  indexed="true"  stored="true"/>
<fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- removes d', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
<!-- removes n-, etc. position increments is intentionally false! -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
<filter class="solr.IrishLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/>
<filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
</analyzer>
</fieldType>

<!-- Galician -->
<dynamicField name="*_txt_gl" type="text_gl"  indexed="true"  stored="true"/>
<fieldType name="text_gl" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
<filter class="solr.GalicianStemFilterFactory"/>
<!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
</analyzer>
</fieldType>

<!-- Hindi -->
<dynamicField name="*_txt_hi" type="text_hi"  indexed="true"  stored="true"/>
<fieldType name="text_hi" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<!-- normalizes unicode representation -->
<filter class="solr.IndicNormalizationFilterFactory"/>
<!-- normalizes variation in spelling -->
<filter class="solr.HindiNormalizationFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
<filter class="solr.HindiStemFilterFactory"/>
</analyzer>
</fieldType>

<!-- Hungarian -->
<dynamicField name="*_txt_hu" type="text_hu"  indexed="true"  stored="true"/>
<fieldType name="text_hu" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
<!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->   
</analyzer>
</fieldType>

<!-- Armenian -->
<dynamicField name="*_txt_hy" type="text_hy"  indexed="true"  stored="true"/>
<fieldType name="text_hy" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
</analyzer>
</fieldType>

<!-- Indonesian -->
<dynamicField name="*_txt_id" type="text_id"  indexed="true"  stored="true"/>
<fieldType name="text_id" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
<!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
<filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
</analyzer>
</fieldType>

<!-- Italian -->
<dynamicField name="*_txt_it" type="text_it"  indexed="true"  stored="true"/>
<fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- removes l', etc -->
<filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
<filter class="solr.ItalianLightStemFilterFactory"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
</analyzer>
</fieldType>

<dynamicField name="*_txt_ja" type="text_ja"  indexed="true"  stored="true"/>
<fieldType name="text_ja" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="false">
<analyzer>

<tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
<!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
<!-- Reduces inflected verbs and adjectives to their base/dictionary forms (辞書形) -->
<filter class="solr.JapaneseBaseFormFilterFactory"/>
<!-- Removes tokens with certain part-of-speech tags -->
<filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
<!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
<filter class="solr.CJKWidthFilterFactory"/>
<!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
<!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
<!-- Lower-cases romaji characters -->
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>

<!-- Korean morphological analysis -->
<dynamicField name="*_txt_ko" type="text_ko"  indexed="true"  stored="true"/>
<fieldType name="text_ko" class="solr.TextField" positionIncrementGap="100">
<analyzer>

<tokenizer class="solr.KoreanTokenizerFactory" decompoundMode="discard" outputUnknownUnigrams="false"/>
<!-- Removes some part of speech stuff like EOMI (Pos.E), you can add a parameter 'tags',
listing the tags to remove. By default it removes: 
E, IC, J, MAG, MAJ, MM, SP, SSC, SSO, SC, SE, XPN, XSA, XSN, XSV, UNA, NA, VSV
This is basically an equivalent to stemming.
-->
<filter class="solr.KoreanPartOfSpeechStopFilterFactory" />
<!-- Replaces term text with the Hangul transcription of Hanja characters, if applicable: -->
<filter class="solr.KoreanReadingFormFilterFactory" />
<filter class="solr.LowerCaseFilterFactory" />
</analyzer>
</fieldType>

<!-- Latvian -->
<dynamicField name="*_txt_lv" type="text_lv"  indexed="true"  stored="true"/>
<fieldType name="text_lv" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
<filter class="solr.LatvianStemFilterFactory"/>
</analyzer>
</fieldType>

<!-- Dutch -->
<dynamicField name="*_txt_nl" type="text_nl"  indexed="true"  stored="true"/>
<fieldType name="text_nl" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
<filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
<filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
</analyzer>
</fieldType>

<!-- Norwegian -->
<dynamicField name="*_txt_no" type="text_no"  indexed="true"  stored="true"/>
<fieldType name="text_no" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
<!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory"/> -->
<!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory"/> -->
</analyzer>
</fieldType>

<!-- Portuguese -->
<dynamicField name="*_txt_pt" type="text_pt"  indexed="true"  stored="true"/>
<fieldType name="text_pt" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
<filter class="solr.PortugueseLightStemFilterFactory"/>
<!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
<!-- most aggressive: <filter class="solr.PortugueseStemFilterFactory"/> -->
</analyzer>
</fieldType>

<!-- Romanian -->
<dynamicField name="*_txt_ro" type="text_ro"  indexed="true"  stored="true"/>
<fieldType name="text_ro" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
</analyzer>
</fieldType>

<!-- Russian -->
<dynamicField name="*_txt_ru" type="text_ru"  indexed="true"  stored="true"/>
<fieldType name="text_ru" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
<!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
</analyzer>
</fieldType>

<!-- Swedish -->
<dynamicField name="*_txt_sv" type="text_sv"  indexed="true"  stored="true"/>
<fieldType name="text_sv" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
<!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
</analyzer>
</fieldType>

<!-- Thai -->
<dynamicField name="*_txt_th" type="text_th"  indexed="true"  stored="true"/>
<fieldType name="text_th" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.ThaiTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" />
</analyzer>
</fieldType>

<!-- Turkish -->
<dynamicField name="*_txt_tr" type="text_tr"  indexed="true"  stored="true"/>
<fieldType name="text_tr" class="solr.TextField" positionIncrementGap="100">
<analyzer> 
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.TurkishLowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
</analyzer>
</fieldType>

<fieldType name="text_ik_zw" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="org.apache.lucene.analysis.cn.smart.HMMChineseTokenizerFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="org.apache.lucene.analysis.cn.smart.HMMChineseTokenizerFactory"/>
</analyzer>
</fieldType>

<fieldType name="vertical_text" class="solr.TextField">
<analyzer>
<tokenizer class="com.trainning.project.custom.MyVerticalLineTokenizerFactory"/>
</analyzer>
</fieldType>

<fieldType name="comma_text" class="solr.TextField">
<analyzer>
<tokenizer class="com.trainning.project.custom.MyCommaTokenizerFactory"/>
</analyzer>
</fieldType>
 
</schema>
EOF;
    // 生成文件
    file_put_contents($path, $file);
    return true;
}
撰写回答
你尚未登录,登录后可以
  • 和开发者交流问题的细节
  • 关注并接收问题和回答的更新提醒
  • 参与内容的编辑和改进,让解决方法与时俱进
推荐问题