【刷题小结】2016及2013苏州大学复试上机

2016年保研上机题

题目

0. 请从服务器将两个数据文件input.txt和words.txt下载到本地电脑的D盘根文件夹。
1. 在D盘根文件夹的words.txt中存储了不超过30000条的英文单词，每个单词占一行。单词的最大长度为20，且单词内部没有空格，文件中无重复单词。
2. 在D盘根文件夹的input.txt中存储了一个“丢失”了空格和标点符号的英文文章。每行不超过128个字符，请编写程序把该文章中的第一行和最后一行。
4. 编写程序利用words.txt中的单词作为词典，采用正向最大匹配切分单词算法对input.txt中的文本进行单词切分。切分时单词区分大小写，切分分割标记采用空格，并将切分后的结果写入到out.txt中。
5. 编写程序实现步骤2、3描述的要求，并通过如下所示的主函数对进行验证，注意：除了指定添加的代码之外，不得修改main函数其余部分。对main函数每修改一处，总分扣3分，最多扣10分。
6. 本次考试考核C语言程序设计，因此不可以使用C++的STL的任何功能，如果需要添加下面样例之外的程序头文件，请举手得到监考老师批准。

分析

一直说苏大的nlp比较好，所以就出了一道分词。其实做下来这么些年的上机题，发现常出现的有两个：文件操作、素数和字典树。
这道题给了主函数，主函数给了一个数组给我存放字典，但我犹豫了许久还是在这个数组的基础上建立了字典树。虽然一开始对字典树的一些操作比较麻烦，还差一点就没能在两个小时内完成，但性能确实提高了不少。
程序大概分为如下几个部分：
（1）读取文件，显示第一行和最后一行。
（2）读取单词。
（3）根据读取的结果建立字典树。
（4）遍历字典树，进行单词划分。
另外：给的input.txt有非英文字符，需注意。
这里纠正一个错误，C语言不允许在结构体里面写函数（毛大佬教我的），这里的函数还是拿出来比较好。但是代码我就不改了。

代码

//姓名：
//学号:
//具体解决思路描述：

#include <time.h>
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#define SIZE 30000

struct Trienode
{
	char c;
	Trienode * child[52]; // 前26个为小写，后26个为大写
	int scount;
	void initNode() {
		for (int i = 0; i < 52; i++) {
			child[i] = NULL;
		}
		scount = 0;
	}
};

void showFirstAndLastLine(char * filename);
int ReadWords(char * filename, char words[][24]);
Trienode* buildTrieTree(char words[][24], int count);
void divideWords(Trienode * head, char filename1[], char filename2[]);

int main()
{
	char words[SIZE][24];
	clock_t start, finish;
	start = clock();

	showFirstAndLastLine("d:\\input.txt");
	//将input文件的第一行和最后一行显示在屏幕上

	int count = ReadWords("d:\\words.txt", words);
	//读取英文字典，并返回单词数量

	//此处可以添加处理逻辑以实现题目的第3点要求
	//建议将需要的功能实现为多个函数后在此直接或者间接调用
	Trienode * head = buildTrieTree(words, count);
	divideWords(head, "d:\\input.txt", "d:\\out.txt");

	finish = clock();
	printf("Total time:%lf\n", (double)(finish - start) / CLOCKS_PER_SEC);

	return 0;
}

void showFirstAndLastLine(char * filename)
{
	char sen[128], temp[128];
	FILE *fp = fopen(filename, "r");
	int count = 0;
	if (fp == NULL) {
		printf("FILE OPEN ERROR!\n");
		exit(1);
	}

	while (fscanf(fp, "%s", sen) != EOF) {
		if (count == 0) {
			printf("First line:%s\n", sen);
		}
		strcpy(temp, sen);
		count++;
	}
	printf("Last line:%s\n", temp);

	fclose(fp);
}

int ReadWords(char * filename, char words[][24])
{
	int count = 0;
	char w[24];
	FILE *fp = fopen(filename, "r");
	if (fp == NULL) {
		printf("FILE OPEN ERROR!\n");
		exit(1);
	}

	while (fscanf(fp, "%s", w) != EOF&& count < SIZE) {
		strcpy(words[count], w);
		count++;
	}

	fclose(fp);
	return count;
}

Trienode* buildTrieTree(char words[][24], int count)
{
	Trienode * head = (Trienode *)malloc(sizeof(Trienode));
	Trienode * n, *p;
	head->initNode();
	for (int i = 0; i < count; i++) {
		int j = 0;
		p = head;
		while (words[i][j] != '\0') {
			if (words[i][j] >= 'a' && words[i][j] <= 'z') {
				if (p->child[words[i][j] - 'a'] == NULL) {
					n = (Trienode *)malloc(sizeof(Trienode));
					n->c = words[i][j];
					n->initNode();
					p->child[words[i][j] - 'a'] = n;
				}
				p = p->child[words[i][j] - 'a'];
			}
			else {
				if (p->child[words[i][j] - 'A' + 26] == NULL) {
					n = (Trienode *)malloc(sizeof(Trienode));
					n->c = words[i][j];
					n->initNode();
					p->child[words[i][j] - 'A' + 26] = n;
				}
				p = p->child[words[i][j] - 'A' + 26];
			}
			j++;
		}
		p->scount++;
	}

	return head;
}

void divideWords(Trienode * head, char filename1[], char filename2[])
{
	FILE *f1 = fopen(filename1, "r");
	FILE *f2 = fopen(filename2, "w");
	if (f1 == NULL || f2 == NULL) {
		printf("FILE OPEN ERROR!\n");
		exit(1);
	}

	char sen[128];
	Trienode *p, *t;
	int len, startlen;

	while (fscanf(f1, "%s", sen) != EOF) {
		len = strlen(sen);
		for (int i = 0; i < len; i++) {
			t = NULL;
			p = head;
			for (int j = i; j < len; j++) {
				if (sen[j] >= 'a' && sen[j] <= 'z') {
					if (p->child[sen[j] - 'a'] == NULL) {
						break;
					}
					else {
						p = p->child[sen[j] - 'a'];
						if (p->scount > 0) {
							t = p;
						}
					}
				}
				else{
					if (sen[j] >= 'A' && sen[j] <= 'Z') {
						if (p->child[sen[j] - 'A' + 26] == NULL) {
							break;
						}
						else {
							p = p->child[sen[j] - 'A' + 26];
							if (p->scount > 0) {
								t = p;
							}
						}
					}
				}
			}
			p = head;
			while (p != t) {
				if (sen[i] >= 'a' && sen[i] <= 'z') {
					p = p->child[sen[i] - 'a'];
					fprintf(f2, "%c", sen[i]);
				}
				else {
					if (sen[i] >= 'A' && sen[i] <= 'Z') {
						p = p->child[sen[i] - 'A' + 26];
						fprintf(f2, "%c", sen[i]);
					}
				}
				i++;
			}
			fputc(' ', f2);
			i--;
		}
	}

	fclose(f1);
	fclose(f2);
}

2016年复试上机题

题目

文本文件input.txt由若干英文单词和分隔符（空格，回车，换行）构成。根据如下说明编写程序统计不同单词出现的次数（频度）。将统计结果按出现频度从高到低排序，并将出现频度大于5的单词及其频度输出到文件output.txt中。
说明：
(1) 多个连续的分隔符被视为一个分隔符。
(2) 单词大小写敏感。
(3) 每个单词的长度不超过20个字符。
(4) 单词的数量未知。如使用定义静态大数组的方式来统计，将被扣除5分。

分析

这个比起上面的保研题目就简单了很多。程序大概分为三个部分：
（1）读文件，保存成字典树。
（2）创建链表，进行排序。
（3）输出结果。

代码

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

struct Trienode
{
	char c;
	Trienode * child[52];
	int scount;
	void initNode() {
		for (int i = 0; i < 52; i++) {
			child[i] = NULL;
		}
		scount = 0;
	}
};

struct Wordnode
{
	char wds[20];
	int wcount;
	Wordnode * next;
};

Trienode * fileRead(char filename[]);
void sortWord(Trienode * tnode, Wordnode * whead, char w[], int len);
void showData(Wordnode * whead, char filename[], int k);
void freeSpace(Trienode * tnode);
void freeSpace(Wordnode * whead);

int main()
{
	Trienode * thead = fileRead("d:\\input.txt");

	Wordnode * whead = (Wordnode *)malloc(sizeof(Wordnode));
	char w[20];
	memset(w, '\0', sizeof(w));
	whead->next = NULL;

	for (int i = 1; i < 52; i++) {
		sortWord(thead->child[i], whead, w, 0);
	}
	
	showData(whead, "d:\\out.txt", 5);
	// 释放空间
	freeSpace(thead);
	freeSpace(whead);

	return 0;
}

Trienode * fileRead(char filename[])
{
	
	FILE * fp = fopen(filename, "r");
	if (fp == NULL) {
		printf("FILE OPEN ERROR!\n");
		exit(1);
	}

	char word[20];
	Trienode * head = (Trienode *)malloc(sizeof(Trienode));
	Trienode * p, *n;
	head->initNode();

	while (fscanf(fp, "%s", word) != EOF) {
		p = head;
		for (int i = 0; word[i] != '\0'; i++) {
			if (word[i] >= 'a'&&word[i] <= 'z') {
				if (p->child[word[i] - 'a'] == NULL) {
					n = (Trienode *)malloc(sizeof(Trienode));
					n->c = word[i];
					n->initNode();
					p->child[word[i] - 'a'] = n;
				}
				p = p->child[word[i] - 'a'];
			}
			else {
				if (word[i] >= 'A'&&word[i] <= 'Z') {
					if (p->child[word[i] - 'A' + 26] == NULL) {
						n = (Trienode *)malloc(sizeof(Trienode));
						n->c = word[i];
						n->initNode();
						p->child[word[i] - 'A' + 26] = n;
					}
					p = p->child[word[i] - 'A' + 26];
				}
			}
		}
		p->scount++;
	}

	fclose(fp);

	return head;

}

void sortWord(Trienode * tnode, Wordnode * whead, char w[], int len)
{
	if (tnode == NULL) {
		return;
	}
	Wordnode *p, *q;
	w[len] = tnode->c;
	if (tnode->scount > 0) {
		q = whead;
		p = whead->next;

		while (p != NULL) {
			if (p->wcount < tnode->scount) {
				break;
			}
			q = p;
			p = p->next;
		}
		Wordnode *n = (Wordnode *)malloc(sizeof(Wordnode));
		strcpy(n->wds, w);
		n->wcount = tnode->scount;
		q->next = n;
		n->next = p;
	}

	for (int i = 0; i < 52; i++) {
		sortWord(tnode->child[i], whead, w, len + 1);
	}

	w[len] = '\0';
}

void showData(Wordnode * whead, char filename[], int k)
{
	FILE * fp = fopen(filename, "w");
	if (fp == NULL) {
		printf("FILE OPEN ERROR!\n");
		exit(0);
	}
	Wordnode *p = whead->next;
	while (p != NULL && p->wcount > 5) {
		fprintf(fp,"%s，%d\n",p->wds,p->wcount);
		p = p->next;
	}
	fclose(fp);
}

void freeSpace(Trienode * tnode)
{
	if (tnode == NULL) {
		return;
	}
	for (int i = 0; i < 52; i++) {
		freeSpace(tnode->child[i]);
	}
	free(tnode);
}

void freeSpace(Wordnode * whead)
{
	Wordnode * p = whead->next, *q = whead;
	while (p != NULL) {
		free(q);
		q = p;
		p = p->next;
	}
	free(q);
}

2013复试上机

题目

Introduction

The project will read flight data from an input file and flight path requests from another input file and output the required information.

Your Task

Your program should determine if a particular destination airport can be reached from a particular originating airport within a particular number of hops.
A hop (leg of a flight) is a flight from one airport to another on the path between an originating and destination airports.
For example, the flight plan from PVG to PEK might be PVG → CAN → PEK. So PVG → CAN would be a hop and CAN → PEK would be a hop.

Input Data Files

Path Input File(PathInput.txt)
This input file will consist of a number of single origination/destination airport pairs (direct flights). The first line of the file will contain an integer representing the total number of pairs in the rest of the file.
6
[PVG, CAN]
[CAN, PEK]
[PVG, CTU]
[CTU, DLC]
[DLC, HAK]
[HAK, LXA]

Path Request File(PathRequest.txt)
This input file will contain a sequence of pairs of origination/destination airports and a max number of hops. The first line of the file will contain an integer representing the number of pairs in the file.
2
[PVG, DLC, 2]
[PVG, LXA, 2]

Output File(Output.txt)
For each pair in the Path Request File, your program should output the pair followed by “YES” or “NO” indicating that it is possible to get from the origination to destination airports within the max number of hops or it is not possible, respectively.
[PVG, DLC, YES]
[PVG, LXA, NO]

Assumptions you can make:

You may make the following simplifying assumptions in your project:
C/C++ is allowed to be used.
All airport codes will be 3 letters and will be in all caps
Origination/destination pairs are unidirectional. To indicate that both directions of flight are possible, two entries would appear in the file. For example, [PVG, PEK] and [PEK, PVG] would have to be present in the file to indicate that one could fly from Shanghai to Beijing and from Beijing to Shanghai.

分析

这是一道英文题目，但是总体上说不难理解。其实就是给一组路径数据，然后求解在给定距离大小内是否能到达目标结点。这有些像图里面的求解单源点的最短路径，但是题目中的图是一张无权图，所以求解会更简单一点。
程序大概分成以下几个部分：
（1）读文件，构造邻接矩阵。还需要有额外的矩阵保存城市名称和城市数量。当然这里用邻接表也可以。说到邻接矩阵，其实邻接矩阵本身可以用来求解，即邻接矩阵的$n$次方的第$i$行第$j$列的数字就是结点$i$到结点$j$走$n$步可以用的方案数。按理说可以用这种方法求解，但是关于矩阵的计算和保存我还没有仔细想。
（2）读需求文件，邻接矩阵深度优先遍历求解。一般来说无向图求解最短路径是用宽度优先遍历，但是这我用的C，不想自己写队列的函数，还有这道题并不要求求出最短路径长度，所以我用了深度优先遍历。

代码

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#define MAXCITY 100

int cityIndex(char city[][4], char name[], int citynum);
int pathRead(char filename[], int matrix[][100], char city[][4], int * citynum);
bool isAvailable(int matrix[][100], int m, int n, int hops, int citynum, int maxhops, int flag[]);
void requestRead(char infile[], char outfile[], int matrix[][100], char city[][4], int citynum);

void main()
{
	int matrix[MAXCITY][MAXCITY] = { 0 };//邻接矩阵
	char city[MAXCITY][4];
	int citynum = 0;
	int pathnum = pathRead("PathInput.txt", matrix, city, &citynum);
	requestRead("PathRequest.txt", "Out.txt", matrix, city, citynum);
}

int pathRead(char filename[], int matrix[][100], char city[][4], int * citynum)
{
	FILE * fp = fopen(filename, "r");
	if (fp == NULL) {
		printf("FILE OPEN ERROR!\n");
		exit(1);
	}

	int pathnum = 0;
	char cityname[4];
	char temp;
	int m, n;
	if (fscanf(fp, "%d", &pathnum) != EOF) {
		temp = fgetc(fp);
		for (int i = 0; i < pathnum; i++) {
			temp = fgetc(fp);
			fgets(cityname, 4, fp);
			if ((m = cityIndex(city, cityname, *citynum)) == -1) {
				strcpy(city[* citynum], cityname);
				m = *citynum;;
				*citynum = *citynum + 1;
			}
			temp = fgetc(fp);
			temp = fgetc(fp);
			fgets(cityname, 4, fp);
			temp = fgetc(fp);
			temp = fgetc(fp);
			if ((n = cityIndex(city, cityname, *citynum)) == -1) {
				strcpy(city[*citynum], cityname);
				n = *citynum;
				*citynum = *citynum + 1;
			}
			matrix[m][n] = 1;
		}
	}

	fclose(fp);

	return pathnum;
}

int cityIndex(char city[][4], char name[], int citynum)
{
	for (int i = 0; i < citynum; i++) {
		if (strcmp(city[i], name) == 0) {
			return i;
		}
	}
	return -1;
}

void requestRead(char infile[],char outfile[],int matrix[][100],char city[][4],int citynum)
{
	FILE * f1 = fopen(infile, "r");
	FILE * f2 = fopen(outfile, "w");
	if (f1 == NULL || f2 == NULL) {
		printf("FILE OPEN ERROR!\n");
		exit(1);
	}

	int requestnum = 0;
	char temp, cityname1[4], cityname2[4];
	int m, n, maxhops;
	int flag[MAXCITY];
	if (fscanf(f1, "%d", &requestnum) != EOF) {
		temp = fgetc(f1);
		for (int i = 0; i < requestnum; i++) {
			temp = fgetc(f1);
			fgets(cityname1, 4, f1);
			temp = fgetc(f1);
			temp = fgetc(f1);
			fgets(cityname2, 4, f1);
			temp = fgetc(f1);
			temp = fgetc(f1);
			fscanf(f1, "%d", &maxhops);
			temp = fgetc(f1);
			temp = fgetc(f1);

			if ((m = cityIndex(city, cityname1, citynum)) == -1 || 
            (n = cityIndex(city, cityname2, citynum)) == -1) {
				fprintf(f2, "[%s, %s, NO]\n", cityname1, cityname2);
			}
			else {
				memset(flag, '\0', sizeof(flag));
				if (isAvailable(matrix, m, n, 1, citynum, maxhops, flag)) {
					fprintf(f2, "[%s, %s, YES]\n", cityname1, cityname2);
				}
				else {
					fprintf(f2, "[%s, %s, NO]\n", cityname1, cityname2);
				}
			}
		}
	}

	fclose(f1);
	fclose(f2);
}

bool isAvailable(int matrix[][100], int m,int n,int hops,int citynum,int maxhops,int flag[])
{
	if (hops > maxhops) {
		return false;
	}
	if (matrix[m][n] == 1) {
		return true;
	}

	flag[m] = 1;
	for (int i = 0; i < citynum; i++) {
		if (matrix[m][i] == 1 && flag[i] == 0 && 
        isAvailable(matrix, i, n, hops + 1, citynum, maxhops, flag)) {
			return true;
		}
	}
	flag[m] = 0;
	return false;
}