web-dev-qa-db-ja.com

テキストファイルで特定の単語を見つけてカウントする

誰かがコードを手伝ってくれる?テキストファイル内の任意の単語を検索し、それが繰り返された数を数える方法は?

たとえばtest.txt:

hi
hola
hey
hi
bye
hoola
hi

そして、test.txtで何回繰り返されているかを知りたい場合、Wordの「こんにちは」プログラムは「3回繰り返されている」と言う必要があります。

私が何を望んでいるか理解していただければ幸いです。回答ありがとうございます。

6
user1768236
public int countWord(String Word, File file) {
int count = 0;
Scanner scanner = new Scanner(file);
while (scanner.hasNextLine()) {
    String nextToken = scanner.next();
    if (nextToken.equalsIgnoreCase(Word))
    count++;
}
return count;
}
11
HashMap h=new HashMap();                        
FileInputStream fin=new FileInputStream("d:\\file.txt");
BufferedReader br=new BufferedReader(new InputStreamReader(fin));
String n;
while((n=br.readLine())!=null)
{
    if(h.containsKey(n))
    {
    int i=(Integer)h.get(n);
    h.put(n,(i+1));
    }
    else
    h.put(n, 1);
}

このマップを反復処理して、各Wordをマップ値のキーとして使用し、各Wordの数を取得します

3
Bhavik Shah
public class Wordcount 
{
   public static void main(String[] args)
   {       
       int count=0;

       String str="hi this is is is line";

       String []s1=str.split(" ");

       for(int i=0;i<=s1.length-1;i++)
       {
          if(s1[i].equals("is"))
           {
               count++; 
           }
       }

       System.out.println(count);
   }
}
1
Jagan
package File1;

import Java.io.BufferedReader;
import Java.io.FileReader;

public class CountLineWordsDuplicateWords {

public static void main(String[] args) {
    FileReader fr = null;
    BufferedReader br =null;

    String [] stringArray;
    int counLine = 0;
    int arrayLength ;
    String s="";
    String stringLine="";
    try{
        fr = new FileReader("F:/Line.txt");
        br = new BufferedReader(fr);
        while((s = br.readLine()) != null){
            stringLine = stringLine + s;
            stringLine = stringLine + " ";/*Add space*/
            counLine ++;
        }
        System.out.println(stringLine);

        stringArray = stringLine.split(" ");
        arrayLength = stringArray.length;
                     System.out.println("The number of Words is "+arrayLength);
        /*Duplicate String count code */
        for (int i = 0; i < arrayLength; i++) {
            int c = 1 ;
            for (int j = i+1; j < arrayLength; j++) {
                if(stringArray[i].equalsIgnoreCase(stringArray[j])){
                    c++;
                    for (int j2 = j; j2 < arrayLength; j2++) {
                        stringArray[j2] = stringArray[j2+1];
                        arrayLength = arrayLength - 1;
                    }

                }//End of If block
            }//End of Inner for block
        System.out.println("The "+stringArray[i]+" present "+c+" times .");
        }//End of Outer for block
        System.out.println("The number of Line is "+counLine);
        System.out.println();
        fr.close();
        br.close();
    }catch (Exception e) {
        e.printStackTrace();
    }
}//End of main() method 
}//End of class CountLineWordsDuplicateWords

google guava libraryMultiSetコレクションを使用します。

Multiset<String> wordsMultiset = HashMultiset.create();
Scanner scanner = new Scanner(fileName);
while (scanner.hasNextLine()) {
    wordsMultiset.add(scanner.nextLine());
}
for(Multiset.Entry<String> entry : wordsMultiset ){
     System.out.println("Word : "+entry.getElement()+" count -> "+entry.getCount());
}

Apache Commons-StringUtils.countMatches()

1
Andy
package somePackage;   
public static void main(String[] args) {

            String path = ""; //ADD YOUR PATH HERE
            String fileName = "test2.txt";
            String testWord = "Macbeth"; //CHANGE THIS IF YOU WANT
            int tLen = testWord.length();
            int wordCntr = 0;
            String file = path + fileName;
            boolean check;

            try{
                FileInputStream fstream = new FileInputStream(file);
                BufferedReader br = new BufferedReader(new InputStreamReader(fstream));
                String strLine;        
                //Read File Line By Line
                while((strLine = br.readLine()) != null){                
                    //check to see whether testWord occurs at least once in the line of text
                    check = strLine.toLowerCase().contains(testWord.toLowerCase());
                    if(check){                    
                        //get the line, and parse its words into a String array
                        String[] lineWords = strLine.split("\\s+");                    
                        for(String w : lineWords){
                            //first see if the Word is as least as long as the testWord
                            if(w.length() >= tLen){
                                /*
                                1) grab the specific Word, minus whitespace
                                2) check to see whether the first part of it having same length
                                    as testWord is equivalent to testWord, ignoring case
                                */
                                String Word = w.substring(0,tLen).trim();                                                        
                                if(Word.equalsIgnoreCase(testWord)){                                
                                    wordCntr++;
                                }                            
                            }
                        }                    
                    }   
                }            
                System.out.println("total is: " + wordCntr);
            //Close the input stream
            br.close();
            } catch(Exception e){
                e.printStackTrace();
            }
        }
1
Accribus
public int occurrencesOfHi()
{
    String newText = Text.replace("Hi","");
    return (Text.length() - newText.length())/2;
}
0
John V

PatternMatcherを使用して、この方法を試してください。

import Java.io.BufferedReader;
import Java.io.File;
import Java.io.FileNotFoundException;
import Java.io.FileReader;
import Java.util.regex.Matcher;
import Java.util.regex.Pattern;

public class Dem {

    public static void main(String[] args){

        try {
            File f = new File("d://My.txt");
            FileReader fr = new FileReader(f);
            BufferedReader br = new BufferedReader(fr);
            String s = new String();

            while((s=br.readLine())!=null){

                s = s + s;

            }

            int count = 0;
            Pattern pat = Pattern.compile("it*");
            Matcher mat = pat.matcher(s);

            while(mat.find()){

                  if(mat.find()){

                      mat.start();
                      count++;

                  }

            }

            System.out.println(count);
        } catch (Exception e) {

            e.printStackTrace();
        }
    }

}
0
import Java.io.*;
import Java.util.*;

class filedemo
{
public static void main(String ar[])throws Exception
BufferedReader br=new BufferedReader(new FileReader("c:/file.txt"));
 System.out.println("enter the string which you search");
 Scanner ob=new Scanner(System.in);
 String str=ob.next();
 String str1="",str2="";
 int count=0;
while((str1=br.readLine())!=null)
 {
 str2 +=str1;

}  

 int index = str2.indexOf(str);

 while (index != -1) {
 count++;
 str2 = str2.substring(index + 1);
 index = str2.indexOf(str);
}

System.out.println("Number of the occures="+count);
}
}  
0
Sidharam Anache

テキストファイルを1行ずつ読み取ることができます。各行には複数のWordを含めることができると思います。各行について、次を呼び出します。

String[] words = line.split(" "); 
for(int i=0; i<words.length; i++){
   if(words[i].equalsIgnoreCase(searhedWord))
         count++;
}
0
dijkstra

java.util.Scannerを使用してみてください。

public int countWords(String w, String fileName) {
int count = 0;
Scanner scanner = new Scanner(inputFile);
scanner.useDelimiter("[^a-zA-Z]"); // non alphabets act as delimeters
String Word = scanner.next();
if (Word.equalsIgnoreCase(w))
    count++;
   return count;
}
0
Ravindra Bagale
package com.test;

import Java.io.BufferedReader;
import Java.io.File;
import Java.io.FileReader;
import Java.util.Scanner;

public  class Test {

    public static void main(String[] args)  throws Exception{

        BufferedReader bf= new BufferedReader(new FileReader("src/test.txt"));
        Scanner sc = new Scanner(System.in);
        String W=sc.next();
        //String regex ="[\\w"+W+"]";
        int count=0;

        //Pattern p = Pattern.compile();
        String line=bf.readLine();
        String s[];
        do
        {
            s=line.split(" ");
            for(String a:s)
            {
                if(a.contains(W))
                    count++;

            }


            line=bf.readLine();


        }while(line!=null);
        System.out.println(count);
    }



}
0
anshulkatta